Ejemplo n.º 1
0
    def request(self, url, params={}, headers={}):
        """
        Retrieve SDMX messages.
        If needed, override in subclasses to support other data providers.

        :param url: The URL of the message.
        :type url: str
        :return: the xml data as file-like object
        """
        # Generate current config. Merge in any given headers
        cur_config = self.config.copy()
        if 'headers' in cur_config:
            cur_config['headers'] = cur_config['headers'].copy()
            cur_config['headers'].update(headers)
        else:
            cur_config['headers'] = headers

        with closing(requests.get(url, params=params, **cur_config)) as response:
            if response.status_code == requests.codes.OK:
                source = STF(max_size=self.max_size)
                for c in response.iter_content(chunk_size=1000000):
                    source.write(c)

            else:
                source = None
            code = int(response.status_code)
            if 400 <= code <= 499:
                raise response.raise_for_status()
            return source, response.url, response.headers, code
Ejemplo n.º 2
0
    def run(self, opts):
        from lzma.xz import compress

        self.h = sha1()
        tdir = mkdtemp("calibre-mathjax-build")
        try:
            src = opts.path_to_mathjax or self.download_mathjax_release(tdir, opts.mathjax_url)
            self.info("Compressing MathJax...")
            t = SpooledTemporaryFile()
            with ZipFile(t, "w", ZIP_STORED) as zf:
                self.add_file(zf, self.j(src, "unpacked", "MathJax.js"), "MathJax.js")
                self.add_tree(
                    zf,
                    self.j(src, "fonts", "HTML-CSS", self.FONT_FAMILY, "woff"),
                    "fonts/HTML-CSS/%s/woff" % self.FONT_FAMILY,
                )
                for d in "extensions jax/element jax/input jax/output/CommonHTML".split():
                    self.add_tree(zf, self.j(src, "unpacked", *d.split("/")), d)

                zf.comment = self.h.hexdigest()
            t.seek(0)
            with open(self.j(self.RESOURCES, "content-server", "mathjax.zip.xz"), "wb") as f:
                compress(t, f, level=9)
            with open(self.j(self.RESOURCES, "content-server", "mathjax.version"), "wb") as f:
                f.write(zf.comment)
        finally:
            shutil.rmtree(tdir)
Ejemplo n.º 3
0
    def send(self, request, stream=None, timeout=None, verify=None, cert=None, proxies=None):
        pathname = url_to_path(request.url)

        resp = Response()
        resp.status_code = 200
        resp.url = request.url

        try:
            stats = lstat(pathname)
        except (IOError, OSError) as exc:
            resp.status_code = 404
            message = {
                "error": "file does not exist",
                "path": pathname,
                "exception": repr(exc),
            }
            fh = SpooledTemporaryFile()
            fh.write(ensure_binary(json.dumps(message)))
            fh.seek(0)
            resp.raw = fh
            resp.close = resp.raw.close
        else:
            modified = formatdate(stats.st_mtime, usegmt=True)
            content_type = guess_type(pathname)[0] or "text/plain"
            resp.headers = CaseInsensitiveDict({
                "Content-Type": content_type,
                "Content-Length": stats.st_size,
                "Last-Modified": modified,
            })

            resp.raw = open(pathname, "rb")
            resp.close = resp.raw.close
        return resp
Ejemplo n.º 4
0
def gen_data(location=None, **kwargs):
    """Fetches realtime data and generates records"""
    url = '%s/%s' % (kwargs['BASE_URL'], location)
    r = requests.get(url)
    f = SpooledTemporaryFile()  # wrap to access `fileno`
    f.write(r.content)
    return io.read_xls(r., sanitize=True, encoding=r.encoding)
Ejemplo n.º 5
0
def fetch_data(config):
    """Fetches realtime data and generates records"""
    ckan = CKAN(config['ENDPOINT'], apikey=config['API_KEY'])
    # r = ckan.fetch_resource(config['RID'])  # if using ckanutils
    resource = ckan.action.resource_show(id=config['RID'])
    url = resource.get('perma_link') or resource.get('url')
    r = requests.get(url, stream=True)

    if any('403' in h.headers.get('x-ckan-error', '') for h in r.history):
        raise NotAuthorized(
            'Access to fetch resource %s was denied.' % config['RID'])

    try:
        ext = splitext(url)[1].split('.')[1]
    except IndexError:
        ext = cv.ctype2ext(r.headers['Content-Type'])

    if ext == 'csv':
        records = io.read_csv(r.raw, sanitize=True, encoding=r.encoding)
    elif ext in {'xls', 'xlsx'}:
        r = requests.get(url)
        f = SpooledTemporaryFile()
        f.write(r.content)
        records = io.read_xls(f, sanitize=True, encoding=r.encoding)
    else:
        msg = 'Filetype `%s` unsupported.'
        msg += 'Please view tabutils.io documentation for assistance.'
        raise TypeError(msg)

    constraints = [('adm0_name', 'a'), ('mp_month', '3'), ('mp_year', '2015')]

    filterer = lambda x: all(x[k].lower().startswith(v) for k, v in constraints)
    return it.ifilter(filterer, records)
Ejemplo n.º 6
0
def string2spool(input_string):
    """Takes a string as an argument and returns an open file handle with the
    contents of the string"""
    file_object=SpooledTemporaryFile()
    file_object.write(input_string)
    file_object.seek(0)
    return file_object
Ejemplo n.º 7
0
 def create_dump(self):
     if not self.connection.is_usable():
         self.connection.connect()
     dump_file = SpooledTemporaryFile(max_size=10 * 1024 * 1024)
     self._write_dump(dump_file)
     dump_file.seek(0)
     return dump_file
Ejemplo n.º 8
0
 def push_index(self):
   stream = SpooledTemporaryFile(max_size=20 * MB)
   pointers = 0
   stream.write(struct.pack(OFFSET_FMT, pointers))
   self.indexes.append([
     stream, pointers, self.block_size - self.pointer_size
   ])
Ejemplo n.º 9
0
    def __init__(self, data=None, fp=None, length=-1):
        assert bool(data is not None) ^ bool(fp)

        if length == -1:
            if data is not None:
                length = len(data)
            else:
                length = get_size(fp)  # can be -1

        # We allow writer reuse, but if we're working with a stream, we cannot
        # seek. Copy the data to a tempfile.
        if fp and not can_seek(fp):
            newfp = SpooledTemporaryFile(MAX_INMEMORY_SIZE)
            sendfile(newfp, fp)
            length = newfp.tell()
            newfp.seek(0)
            fp = newfp

        self.data = data
        self.fp = fp
        self.fpreads = 0  # keep track of fp usage
        self.length = length

        assert length >= 0
        self.use_tempfile = length > MAX_INMEMORY_SIZE
Ejemplo n.º 10
0
    def close(self):
        """Send the change to the DFS, and close the file."""

        self.flush()

        if 'c' not in self.mode:
            SpooledTemporaryFile.close(self)
Ejemplo n.º 11
0
class GoogleCloudFile(File):
    def __init__(self, name, mode, storage):
        self.name = name
        self.mime_type = mimetypes.guess_type(name)[0]
        self._mode = mode
        self._storage = storage
        # NOTE(mattrobenolt): This is the same change in behavior as in
        # the s3 backend. We're opting now to load the file
        # or metadata at this step. This means we won't actually
        # know a file doesn't exist until we try to read it.
        self.blob = FancyBlob(storage.download_url, self.name, storage.bucket)
        self._file = None
        self._is_dirty = False

    @property
    def size(self):
        return self.blob.size

    def _get_file(self):
        if self._file is None:
            with metrics.timer('filestore.read', instance='gcs'):
                self._file = SpooledTemporaryFile(
                    max_size=self._storage.max_memory_size,
                    suffix=".GSStorageFile",
                    dir=None,
                )
                if 'r' in self._mode:
                    self._is_dirty = False
                    self.blob.download_to_file(self._file)
                    self._file.seek(0)
        return self._file

    def _set_file(self, value):
        self._file = value

    file = property(_get_file, _set_file)

    def read(self, num_bytes=None):
        if 'r' not in self._mode:
            raise AttributeError("File was not opened in read mode.")

        if num_bytes is None:
            num_bytes = -1

        return super(GoogleCloudFile, self).read(num_bytes)

    def write(self, content):
        if 'w' not in self._mode:
            raise AttributeError("File was not opened in write mode.")
        self._is_dirty = True
        return super(GoogleCloudFile, self).write(force_bytes(content))

    def close(self):
        if self._file is not None:
            if self._is_dirty:
                self.file.seek(0)
                self.blob.upload_from_file(self.file, content_type=self.mime_type)
            self._file.close()
            self._file = None
Ejemplo n.º 12
0
    def _open(self, name, mode = 'rb') -> File:
        name = self._transform_name(name)
        content = self.service.get_blob_content(self.container, name)
        file = SpooledTemporaryFile()
        file.write(content)
        file.seek(0) # explicitly reset to allow reading from the beginning afterwards as-is

        return File(file)
Ejemplo n.º 13
0
 def start(self, args):
     self.outFile = SpooledTemporaryFile()
     self.errFile = SpooledTemporaryFile()
     self.cmdline = list2cmdline(args)
     print 'starting: ' + self.cmdline
     self.process = Popen(args,
         stderr=self.errFile, stdout=self.outFile, universal_newlines=False)
     self.process_start = time()
Ejemplo n.º 14
0
    def generate(self):
        points = self.points()
        
        self.buffer = 2*self.pad
        count = np.zeros([x + 2*self.buffer for x in self.expanded_size])
        density = np.zeros([x + 2*self.buffer for x in self.expanded_size])
        
        # Render the B&W density version of the heatmap
        dot_size = self.dot.shape[0]
        for x, y, weight in points:
            x1 = x + self.buffer - (dot_size - 1)/2
            y1 = y + self.buffer - (dot_size - 1)/2
            count[y1:(y1 + dot_size), 
                x1:(x1 + dot_size)] += self.dot
            density[y1:(y1 + dot_size), 
                x1:(x1+ dot_size)] += self.dot*float(weight)

        # Pick the field to map
        if gheat_settings.GHEAT_MAP_MODE == gheat_settings.GHEAT_MAP_MODE_COUNT:
            img = count
            #opacity = np.zeros(img.shape()) + 255
        elif  gheat_settings.GHEAT_MAP_MODE == gheat_settings.GHEAT_MAP_MODE_SUM_DENSITY:
            img = density
            #opacity = np.clip(count, 0, gheat_settings.GHEAT_OPACITY_LIMIT)
        elif  gheat_settings.GHEAT_MAP_MODE == gheat_settings.GHEAT_MAP_MODE_MEAN_DENSITY:
            img = density
            img[count > 0] /= count[count > 0]
            #opacity = np.clip(count, 0, gheat_settings.GHEAT_OPACITY_LIMIT)
        else:
            raise ValueError, 'Unknown map mode'
            
        # Crop resulting density image (which could have grown) into the
        # actual canvas size we want
        img = img[(self.pad + self.buffer):(SIZE + self.pad + self.buffer), 
            (self.pad + self.buffer):(SIZE + self.pad + self.buffer)]
        #opacity = opacity[self.pad:(SIZE + self.pad), self.pad:(SIZE + self.pad)]


        # Maybe use a logarithm
        img = np.where(img>0, np.log(img)+1, img)

        # Convert to a 0 to 255 image
        img = np.clip(256.0*np.power(img/gheat_settings.GHEAT_MAX_VALUE, 
            gheat_settings.GHEAT_SCALING_COEFFICIENT), 0, 255.999).astype('uint8')
        

        # Given the B&W density image, generate a color heatmap based on
        # this Tile's color scheme.
        colour_image = np.zeros((SIZE, SIZE, 4), 'uint8') + 255
        for i in range(3):
            colour_image[:,:,i] = self.schemeobj.colors[:,i][255 - img]
        colour_image[:,:,3] = np.where(img > gheat_settings.GHEAT_MIN_DENSITY, 255, 0)

        tmpfile = SpooledTemporaryFile()
        writer = png.Writer(SIZE, SIZE, alpha=True, bitdepth=8)
        writer.write(tmpfile, np.reshape(colour_image, (SIZE, SIZE*4)))
        tmpfile.seek(0)
        return tmpfile
Ejemplo n.º 15
0
Archivo: utils.py Proyecto: dirn/depot
def file_from_content(content):
    f = content
    if isinstance(content, cgi.FieldStorage):
        f = content.file
    elif isinstance(content, byte_string):
        f = SpooledTemporaryFile(INMEMORY_FILESIZE)
        f.write(content)
    f.seek(0)
    return f
Ejemplo n.º 16
0
 def test_run_command_stdin(self):
     connector = BaseCommandDBConnector()
     stdin = SpooledTemporaryFile()
     stdin.write(b'foo')
     stdin.seek(0)
     # Run
     stdout, stderr = connector.run_command('cat', stdin=stdin)
     self.assertEqual(stdout.read(), b'foo')
     self.assertFalse(stderr.read())
Ejemplo n.º 17
0
class GoogleCloudFile(File):
    def __init__(self, name, mode, storage):
        self.name = name
        self.mime_type = mimetypes.guess_type(name)[0]
        self._mode = mode
        self._storage = storage
        self.blob = storage.bucket.get_blob(name)
        if not self.blob and 'w' in mode:
            self.blob = Blob(self.name, storage.bucket)
        self._file = None
        self._is_dirty = False

    @property
    def size(self):
        return self.blob.size

    def _get_file(self):
        if self._file is None:
            self._file = SpooledTemporaryFile(
                max_size=self._storage.max_memory_size,
                suffix=".GSStorageFile",
                dir=setting("FILE_UPLOAD_TEMP_DIR", None)
            )
            if 'r' in self._mode:
                self._is_dirty = False
                self.blob.download_to_file(self._file)
                self._file.seek(0)
        return self._file

    def _set_file(self, value):
        self._file = value

    file = property(_get_file, _set_file)

    def read(self, num_bytes=None):
        if 'r' not in self._mode:
            raise AttributeError("File was not opened in read mode.")

        if num_bytes is None:
            num_bytes = -1

        return super(GoogleCloudFile, self).read(num_bytes)

    def write(self, content):
        if 'w' not in self._mode:
            raise AttributeError("File was not opened in write mode.")
        self._is_dirty = True
        return super(GoogleCloudFile, self).write(force_bytes(content))

    def close(self):
        if self._file is not None:
            if self._is_dirty:
                self.file.seek(0)
                self.blob.upload_from_file(self.file, content_type=self.mime_type)
            self._file.close()
            self._file = None
Ejemplo n.º 18
0
 def read_file_handle(self, filename):
     """Get the file data, put it in a SpooledTemporaryFile object for return
     and reading
     """
     logger.debug("read_file_handle('%s')" % filename)
     read_buffer = SpooledTemporaryFile()
     response = self.rc.fs.read_file(read_buffer, filename)
     logger.debug(response)
     read_buffer.seek(0)
     return read_buffer
Ejemplo n.º 19
0
 def close(self):
     """On close, seek to 0 and write the data via the API, then close()
     for realz
     """
     logger.debug("close() called on WriteBuffer")
     self.seek(0)
     logger.debug("Attempting to create file at dir_path %s with name %s" %
                  (self.path, self.filename))
     self.fs.rc.fs.write_file(self, self.fullpath)
     SpooledTemporaryFile.close(self)  # old-style class!
Ejemplo n.º 20
0
def filter_file(filter, filename, membuffer=10485760):
    tmp = SpooledTemporaryFile(max_size=membuffer)
    with open(filename) as input:
        for line in input:
            if filter(line):
                tmp.write(line)
    tmp.seek(0)
    with open(filename, "w") as output:
        for line in tmp:
            output.write(line)
Ejemplo n.º 21
0
    def run_command(self, command, stdin=None, env=None):
        """
        Launch a shell command line.

        :param command: Command line to launch
        :type command: str
        :param stdin: Standard input of command
        :type stdin: file
        :param env: Environment variable used in command
        :type env: dict
        :return: Standard output of command
        :rtype: file
        """
        cmd = shlex.split(command)
        stdout = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        stderr = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        full_env = self.env.copy()
        full_env.update(env or {})
        try:
            process = Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr,
                            env=full_env)
            process.wait()
            if process.poll():
                stderr.seek(0)
                raise exceptions.CommandConnectorError(
                    "Error running: {}\n{}".format(command, stderr.read()))
            stdout.seek(0)
            stderr.seek(0)
            return stdout, stderr
        except OSError as err:
            raise exceptions.CommandConnectorError(
                "Error running: {}\n{}".format(command, str(err)))
Ejemplo n.º 22
0
 def __init__(self, path, filename, fs, max_size=WRITE_BUFFER_SIZE):
     """We need the path so we can write the buffered file to the API"""
     SpooledTemporaryFile.__init__(self, max_size=max_size)  # old-style!
     self.path = path
     self.filename = filename
     self.fs = fs
     self.fullpath = ''
     try:
         self.fullpath = self.create_file()
     except RequestError, e:
         SpooledTemporaryFile.close(self)
         raise FilesystemError(str(e))
Ejemplo n.º 23
0
    def upload_file(self, user, stream, expected_size, filename, force_coll_name=''):
        """Upload WARC archive.

        :param User user: user
        :param stream: file object
        :param int expected_size: expected WARC archive size
        :param str filename: WARC archive filename
        :param str force_coll_name: name of collection to upload into

        :returns: upload information
        :rtype: dict
        """
        temp_file = None
        logger.debug('Upload Begin')

        logger.debug('Expected Size: ' + str(expected_size))

        #is_anon = False

        size_rem = user.get_size_remaining()

        logger.debug('User Size Rem: ' + str(size_rem))

        if size_rem < expected_size:
            return {'error': 'out_of_space'}

        if force_coll_name and not user.has_collection(force_coll_name):
            #if is_anon:
            #    user.create_collection(force_coll, 'Temporary Collection')

            #else:
            #status = 'Collection {0} not found'.format(force_coll_name)
            return {'error': 'no_such_collection'}

        temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE)

        stream = CacheingLimitReader(stream, expected_size, temp_file)

        if filename.endswith('.har'):
            stream, expected_size = self.har2warc(filename, stream)
            temp_file.close()
            temp_file = stream

        infos = self.parse_uploaded(stream, expected_size)

        total_size = temp_file.tell()
        if total_size != expected_size:
            return {'error': 'incomplete_upload', 'expected': expected_size, 'actual': total_size}

        upload_id, upload_key = self._init_upload_status(user, total_size, 1, filename=filename)

        return self.handle_upload(temp_file, upload_id, upload_key, infos, filename,
                                  user, force_coll_name, total_size)
Ejemplo n.º 24
0
    def get_empty(self, opacity=OPAQUE):
        color = self.colors[255,:]
        #color[3] = int(color[3]*float(opacity)/255)
        color[3] = 0

        empty = np.tile(color, SIZE*SIZE).reshape(SIZE, SIZE*4)

        tmpfile = SpooledTemporaryFile()
        writer = png.Writer(SIZE, SIZE, alpha=True, bitdepth=8)
        writer.write(tmpfile, empty)
        tmpfile.seek(0)
        return tmpfile
Ejemplo n.º 25
0
class DropBoxFile(File):
    def __init__(self, name, storage):
        self.name = name
        self._storage = storage

    @property
    def file(self):
        if not hasattr(self, '_file'):
            response = self._storage.client.get_file(self.name)
            self._file = SpooledTemporaryFile()
            copyfileobj(response, self._file)
            self._file.seek(0)
        return self._file
Ejemplo n.º 26
0
    def generate(self):
        points = self.points()
        
        # Grab a new PIL image canvas
        img = Image.new('RGBA', self.expanded_size, 'white')
        
        # Render the B&W density version of the heatmap
        for x,y in points:
            dot_placed = Image.new('RGBA', self.expanded_size, 'white')
            dot_placed.paste(self.dot, (x, y))
            img = ImageChops.multiply(img, dot_placed)

        # Crop resulting density image (which could have grown) into the
        # actual canvas size we want
        img = img.crop((self.pad, self.pad, SIZE+self.pad, SIZE+self.pad))
        img = ImageChops.duplicate(img) # converts ImageCrop => Image

        # Given the B&W density image, generate a color heatmap based on
        # this Tile's color scheme.
        _computed_opacities = dict()
        pix = img.load() # Image => PixelAccess
        for x in range(SIZE):
            for y in range(SIZE):

                # Get color for this intensity
                # ============================
                # is a value 
                val = self.schemeobj.colors[0, pix[x,y][0]]
                try:
                    pix_alpha = val[3] # the color image has transparency
                except IndexError:
                    pix_alpha = OPAQUE # it doesn't
                

                # Blend the opacities
                # ===================

                conf, pixel = self.opacity, pix_alpha
                if (conf, pixel) not in _computed_opacities:
                    opacity = int(( (conf/255.0)    # from configuration
                                  * (pixel/255.0)   # from per-pixel alpha
                                   ) * 255)
                    _computed_opacities[(conf, pixel)] = opacity
                
                pix[x,y] = val[:3] + (_computed_opacities[(conf, pixel)],)
        
        tmpfile = SpooledTemporaryFile()
        img.save(tmpfile, 'PNG')
        tmpfile.seek(0)
        
        return tmpfile
Ejemplo n.º 27
0
    def generate_thumbnail(self, content):
        content = file_from_content(content)
        uploaded_image = Image.open(content)
        if max(uploaded_image.size) >= self.max_size:
            uploaded_image.thumbnail((self.max_size, self.max_size), Image.BILINEAR)
            content = SpooledTemporaryFile(INMEMORY_FILESIZE)
            uploaded_image.save(content, uploaded_image.format)

        content.seek(0)

        thumbnail = uploaded_image.copy()
        thumbnail.thumbnail(self.thumbnail_size, Image.ANTIALIAS)
        thumbnail = thumbnail.convert('RGBA')
        thumbnail.format = self.thumbnail_format

        output = SpooledTemporaryFile(INMEMORY_FILESIZE)
        thumbnail.save(output, self.thumbnail_format)
        output.seek(0)

        thumb_path, thumb_id = self.store_content(output,
                                                  'thumb.%s' % self.thumbnail_format.lower())
        self['thumb_id'] = thumb_id
        self['thumb_path'] = thumb_path

        thumbnail_file = self.thumb_file
        self['_thumb_public_url'] = thumbnail_file.public_url
        content.close()
Ejemplo n.º 28
0
    def _buffer_response(status_headers, iterator):
        out = SpooledTemporaryFile(ProxyRouter.BUFF_RESPONSE_MEM_SIZE)
        size = 0

        for buff in iterator:
            size += len(buff)
            out.write(buff)

        content_length_str = str(size)
        # remove existing content length
        status_headers.replace_header('Content-Length',
                                      content_length_str)

        out.seek(0)
        return RewriteContent.stream_to_gen(out)
Ejemplo n.º 29
0
    def __init__(self, filepath, mode='rtc'):
        """filepath: the path of the distant file
           mode: take the same argument as mode argument of the global
                 open() + optional flag c (which mean store in cache).
        """

        self.mode = mode
        self.filepath = filepath
        host, port = utils.get_host_port(_config['nameserver'])
        self.srv = utils.get_server(filepath, host, port)

        if self.srv is None:
            raise DFSIOError('Impossible to find a server that serve %s.'
                    % filepath)

        self.last_modified = None
        SpooledTemporaryFile.__init__(self, _config['max_size'], mode.replace('c', ''))

        host, port = utils.get_host_port(_config['lockserver'])
        if utils.is_locked(filepath, host, port):
            raise DFSIOError('The file %s is locked.' % filepath)

        if 'w' not in mode:
            host, port = utils.get_host_port(self.srv)
            with closing(HTTPConnection(host, port)) as con:
                con.request('GET', filepath)
                response = con.getresponse()
                self.last_modified = response.getheader('Last-Modified')
                status = response.status

                if status not in (200, 204):
                    raise DFSIOError('Error (%d) while opening file.' % status)

                if status != 204:
                    self.write(response.read())

                if 'r' in mode:
                    self.seek(0)

                self.lock_id = None

        if 'a' in mode or 'w' in mode:
            # automatically gets a lock if we're in write/append mode
            host, port = utils.get_host_port(_config['lockserver'])
            self.lock_id = int(utils.get_lock(filepath, host, port))

        if 'c' in mode:
            File._cache[filepath] = self
Ejemplo n.º 30
0
def file_from_content(content):
    """Provides a real file object from file content

    Converts ``FileStorage``, ``FileIntent`` and
    ``bytes`` to an actual file.
    """
    f = content
    if isinstance(content, cgi.FieldStorage):
        f = content.file
    elif isinstance(content, FileIntent):
        f = content._fileobj
    elif isinstance(content, byte_string):
        f = SpooledTemporaryFile(INMEMORY_FILESIZE)
        f.write(content)
    f.seek(0)
    return f
Ejemplo n.º 31
0
 def _har2warc_temp_file(self):
     return SpooledTemporaryFile(max_size=BLOCK_SIZE)
Ejemplo n.º 32
0
    def insert_extracts(self, user, payload):
        """Insert multiple extracts into the database, as a single request

        :param user:
        :param payload:
        :return:
        """
        conn = g._db
        db_name = self._config.get("MySql", "db")
        env = self._config.get("api", "env")
        cor_admin = user.get("digs")

        # self._logger.error("oopies")

        user_info = payload.get("user")
        samples = payload.get("samples")
        facility = payload.get("facility")
        manifesto = payload.get("manifesto")
        institution = user.get("ldap", {}).get("actor_institution")
        request_uid = str(uuid4())

        # create array of choices to insert into database
        # this is only for tracking the options presented purposes (possible debugging),
        # since only facility.get("choice") is critical information
        fse_choices = facility.get("digs")
        fse = []
        for key in fse_choices:
            try:
                rank = int(key)
            except:
                pass
            else:
                fse.append(fse_choices[str(rank)])

        request_id = None
        emailed = None

        # Create request
        query = """INSERT INTO `{}`.`Requests` (`uid`, `digs_id`, `fse`, `requester`, `institution`, `created` )
                  VALUES (\"{}\", (SELECT id from `{}`.`Digs` WHERE digs_core_number LIKE \"{}\"), \'{}\', "{}", \"{}\", NOW())""" \
            .format(db_name, request_uid, db_name, facility.get("choice"), json.dumps(fse),
                    user.get("ldap", {}).get("actor_username"), institution)

        with conn.cursor() as cursor:
            try:
                cursor.execute(query)
                conn.commit()
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                raise ApiSqlException(title=str(e), description=str(e),
                                      logger=self._logger, config=self._config, stacktrace=exc_value)
            else:
                query = """SELECT id from `{}`.`Requests` WHERE `uid` LIKE '{}'""".format(db_name, request_uid)
                try:
                    cursor.execute(query)
                    columns = [field[0] for field in cursor.description]
                    res = cursor.fetchone()
                    if res:
                        request = dict(zip(columns, [res[0]]))
                except Exception as e:
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    raise ApiSqlException(title=str(e), description=str(e),
                                          logger=self._logger, config=self._config, stacktrace=exc_value)
                else:
                    request_id = request.get("id")

            selected_facility = None
            query = """SELECT * FROM `{}`.`Digs` WHERE `digs_core_number` LIKE '{}'""".format(db_name, facility.get("choice"))
            try:
                cursor.execute(query)
                columns = [field[0] for field in cursor.description]
                res = cursor.fetchone()
                if res:
                    selected_facility = dict(zip(columns, res))
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                raise ApiSqlException(title=str(e), description=str(e),
                                      logger=self._logger, config=self._config, stacktrace=exc_value)

            csv_manifest = []
            res_manifesto = []
            if request_id:
                csv_manifest.append('DIGS Sequencing Request Submission ID: R{}\n\n'.format(request_id))
                # requester info block
                csv_manifest.append('Requester Contact Information\n')
                csv_manifest.append('Name,{}\n'.format(user_info.get("name")))
                csv_manifest.append('Address,"{}, {} {} {}"\n'.format(
                    user_info.get("street_address"),
                    user_info.get("city"),
                    user_info.get("state_province") if user_info.get("state_province") != "N/A" else "",
                    selected_facility.get("zipcode")))
                csv_manifest.append('Phone,{}\n'.format(user_info.get("daytime_phone")))
                csv_manifest.append('Email,{}\n\n'.format(user_info.get("email")))
                # facility info block
                csv_manifest.append('DIGS Facility Contact Information\n')
                csv_manifest.append('Name,{}\n'.format(selected_facility.get("contact_name")))
                csv_manifest.append('Address,"{}, {} {} {}"\n'.format(
                    selected_facility.get("shipping_address_street"),
                    selected_facility.get("shipping_address_city"),
                    selected_facility.get("shipping_address_state") if selected_facility.get("shipping_address_state") \
                                                                       != "N/A" else "",
                    selected_facility.get("shipping_address_zip")))
                csv_manifest.append('Phone,{}\n'.format(selected_facility.get("contact_phone")))
                csv_manifest.append('Email,{}\n\n'.format(selected_facility.get("contact_email")))

                # sample table block
                csv_manifest.append("Sample Identifier,Extract Identifier,Sequencing Study Identifier,Submission ID,"
                                    "Submission Type,Submitter Name,Submission Date,Project Identifier,"
                                    "Contributing Institution,Virus Identifier,Strain Name,Influenza Subtype,"
                                    "Host Species,Lab Host,Passage History,Pathogenicity,"
                                    "Extract Material,Volume (µl),Concentration (ng/µl),Concentration Determined By,"
                                    "Sequencing Technology,Analysis Type,Raw Sequences,Comments\n")

                request_contains_rna_sample = False

                for row, sample in enumerate(samples, 12):
                    if sample.get("extract_material") == "Viral RNA":
                        request_contains_rna_sample = True

                    extract_id = "R{}_{}".format(request_id, sample["extract_id"])
                    query = """INSERT INTO `{}`.`Extracts` (`request_id`, `sample_id`, `extract_id`,
                                    `sequencing_study_identifier`, `submission_id`, `submission_type`, `submitter_name`,
                                    `submission_date`, `project_identifier`, `virus_identifier`, `influenza_subtype`,
                                    `host_species`, `lab_host`, `passage_history`, `pathogenicity`, `extract_material`,
                                    `volume`, `concentration`, `concentration_determined_by`, `sequencing_tecnhology`,
                                     `analysis_type`, `raw_sequences`, `comments`, `status`, `created`,
                                     `sample_identifier`)
                                VALUES( {}, '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}',
                                    '{}', '{}', '{}', {}, {}, '{}', '{}', '{}','{}', '{}', 'Requested',
                                    CURRENT_TIMESTAMP, '{}' )""". \
                        format(db_name,
                               request_id,
                               sample.get("sample_id"),
                               extract_id,
                               sample.get("sequencing_study_identifier"),
                               sample.get("submission_id"),
                               sample.get("submission_type"), sample.get("submitter_name"),
                               sample.get("submission_date"),
                               sample.get("project_identifier"),
                               sample.get("virus_identifier"),
                               sample.get("influenza_subtype"),
                               sample.get("host_species"), sample.get("lab_host"),
                               sample.get("passage_history"),
                               sample.get("pathogenicity"),
                               sample.get("extract_material"),
                               sample.get("volume"), sample.get("concentration"),
                               sample.get("concentration_determined_by"),
                               json.dumps(sample.get("sequencing_technology")),
                               json.dumps(sample.get("analysis_type")),
                               sample.get("raw_sequences", '0'),
                               sample.get("comments"),
                               sample.get("sample_identifier"))

                    try:
                        res = cursor.execute(query)
                        if res:
                            conn.commit()
                    except Exception as e:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        raise ApiSqlException(title=str(e), description=str(e),
                                              logger=self._logger, config=self._config, stacktrace=exc_value)
                    else:
                        analysis_type = " / ".join(
                            [analysis for analysis in sample["analysis_type"] if sample["analysis_type"][analysis]])
                        sequencing_technology = " or ".join(
                            [tech for tech in sample["sequencing_technology"] if sample["sequencing_technology"][tech]])
                        csv_manifest.append("\"{}\",\"{}\",\"{}\",\"{}\t\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\","
                                            "\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\","
                                            "\"{}\",\"{}\",\"{}\",\"{}\"\n".format(
                            sample.get("sample_identifier"), extract_id,
                            sample.get("sequencing_study_identifier"), sample.get("submission_id", ""),
                            sample.get("submission_type", ""), sample.get("submitter_name", ""),
                            sample.get("submission_date", ""), sample.get("project_identifier"),
                            sample.get("contributing_institution", ""), sample.get("virus_identifier"),
                            sample.get("strain_name"), sample.get("influenza_subtype"),
                            sample.get("host_species"), sample.get("lab_host"), sample.get("passage_history"),
                            sample.get("pathogenicity"), sample.get("extract_material"),
                            sample.get("volume"), sample.get("concentration"),
                            sample.get("concentration_determined_by"), sequencing_technology, analysis_type,
                            sample.get("raw_sequences", "N"), sample.get("comments", "")))
                        res_manifesto.append([sample.get("sample_identifier"), sample.get("extract_id"),
                            sample.get("sequencing_study_identifier"), sample.get("submission_id", ""),
                            sample.get("submission_type", ""), sample.get("submitter_name", ""),
                            sample.get("submission_date", ""), sample.get("project_identifier"),
                            sample.get("contributing_institution", ""), sample.get("virus_identifier"),
                            sample.get("strain_name"), sample.get("influenza_subtype"), sample.get("host_species"),
                            sample.get("lab_host"), sample.get("passage_history"), sample.get("pathogenicity"),
                            sample.get("extract_material"), sample.get("volume"), sample.get("concentration"),
                            sample.get("concentration_determined_by"), sequencing_technology, analysis_type,
                            sample.get("raw_sequences", "N"), sample.get("comments", "")])

                # wb.save("manifest.xls")
                with SpooledTemporaryFile() as fh:
                    # writer = csv.writer(fh, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
                    fh.writelines([x.encode('utf-8') for x in csv_manifest])
                    fh.seek(0)
                # with open("swt/scripts/request_{}.{}".format(request_id, "csv"), 'w') as file:
                #     for line in csv_manifest:
                #         file.write(line)

                    aws_configuration = {
                        'aws_access_key_id': self._config.get('aws', 'aws_access_key_id'),
                        'aws_secret_access_key': self._config.get('aws', 'aws_secret_access_key'),
                        'region_name': self._config.get('aws', 'aws_region')
                    }

                    _s3 = S3(credentials=aws_configuration, logger=self._logger)

                    filename = "request_{}.{}".format(request_id, "csv")
                    bucket = self._config.get("aws", "s3_bucket")
                    # bucket = "swt-prod"
                    key = "manifest-files/" + filename

                    pre_signed_url = _s3.upload(bucket, key, fh)

                    if not pre_signed_url:
                        return {"status": False,
                                "statusText": "Error uploading manifest to aws"}

                    rna_warning_msg = """<br><p><u>Notice</u>: This Sequencing Request includes RNA-based samples.
                        Please handle accordingly when shipping your samples.</p>""" \
                        if request_contains_rna_sample else ""

                    conn = boto.ses.connect_to_region(**aws_configuration)
                    email_from = '*****@*****.**'

                    user_email = user_info.get("email")
                    facility_email = user_email

                    if env == "prod":
                        facility_email = facility.get("digs", {}).get("contact_info", {})\
                            .get(facility.get("choice")).get("email")


                    email_body_facility = EXTRACT_EMAIL_FACILITY_BODY.format(request_id,
                                                                             user_info.get("name"),
                                                                             "{}, {} {} {}".format(
                                                                                 user_info.get("street_address"),
                                                                                 user_info.get("city"),
                                                                                user_info.get("state_province") \
                                                                                    if user_info.get("state_province") \
                                                                                       != "N/A" else "",
                                                                                 user_info.get("zipcode")),
                                                                             user_info.get("daytime_phone"),
                                                                             user_info.get("email"),
                                                                             EXTRACT_EMAIL_SIGNATURE,
                                                                             rna_warning_msg)
                    emailMsg = MIMEMultipart()
                    emailMsg['Subject'] = EXTRACT_EMAIL_FACILITY_SUBJECT
                    emailMsg['From'] = email_from
                    emailMsg['To'] = facility_email
                    emailMsg.preamble = 'Multipart message.\n'
                    part = MIMEText(email_body_facility, 'html')
                    emailMsg.attach(part)
                    part = MIMEBase('application', 'octet-stream')
                    fh.seek(0)
                    part.set_payload(fh.read())
                    encode_base64(part)
                    part.add_header('Content-Disposition', 'attachment; filename="shipping_manifest.csv"')
                    emailMsg.attach(part)
                    emailed_facility = conn.send_raw_email(emailMsg.as_string(),
                                                           source=emailMsg['From'], destinations=[emailMsg['To']])

                    email_body_requester = EXTRACT_EMAIL_REQUESTER_BODY.format(user_info.get("name"),
                                                                               request_id,
                                                                               pre_signed_url,
                                                                               selected_facility.get(
                                                                                   "digs_core_name"),
                                                                               selected_facility.get(
                                                                                   "digs_core_name"),
                                                                               EXTRACT_EMAIL_SIGNATURE,
                                                                               rna_warning_msg)
                    emailed_requester = conn.send_email(email_from, EXTRACT_EMAIL_REQUESTER_SUBJECT,
                                                        None, to_addresses=user_info.get("email"),
                                                        format="html", html_body=email_body_requester)
                    email_failed = not emailed_facility or not emailed_requester

                    if email_failed:
                        raise ApiSqlException(
                            title="Extracts have been saved, but there was a problem emailing the DIGS facility. Please contact the DPCC",
                            description="Extracts have been saved, but there was a problem emailing the DIGS facility. Please contact the DPCC",
                            logger=self._logger, config=self._config)
                    return {"status": True, "request_id": request_id, "manifesto": res_manifesto}
Ejemplo n.º 33
0
    def download_file(self, key, path='', temporary=False, on_memory=False, retries=2, return_path=False):
        """Download a file from S3, retrying if the download fails.
        Optionally, the file could be downloaded as a temporary file,
        returning a file handler.

        Input:
            key, the S3 key object to download
            path, folder to store the file
            temporary, indicates if the file should be downloaded as a
                       temporary file. With this, the function returns
                       a file handler
            on_memory, if the 'temporary' parameter is True, and this
                       is True, the temporary file is created in memory,
                       using an instance of  tempfile.SpooledTemporaryFile
                       class
            retries, number of retries if the download fails

        Output:
            The path where the file was downloaded, or a file handler if temporary is True

        """

        k = self.get_key(key)

        # Creates the path if it not exists
        if not on_memory and path and not os.path.isdir(path):
            os.makedirs(path)

        fails = 0

        for retry in xrange(retries):
            try:
                logger.info('Downloading {0}/{1} to {2}'.format(self.s3_bucket_url, key, path))

                # When using regular files, it's not necessary to return a file handler
                tmp_file_handler = None

                if temporary:
                    log_msg = "Using a temporary file"

                    # This is a hack: gzip.GzipFile only accepts reading from files
                    # with 'r' or 'rb' mode. This is checked recovering the first
                    # letter from the mode, so, opening the file using the 'r+b' mode
                    # allows reading and writting the temporary file without problems
                    if re.search('\.gz$', key):
                        file_mode = 'r+b'
                    else:
                        # In other cases, use the default mode for NamedTemporaryFile
                        file_mode = 'w+b'

                    if on_memory:
                        log_msg += " [on memory]"
                        tmp_file_handler = SpooledTemporaryFile(mode=file_mode, max_size=MAX_TEMP_FILE_SIZE)
                    else:
                        log_msg += " [on disk]"
                        tmp_file_handler = NamedTemporaryFile(mode=file_mode, dir=path)

                    log_msg += " [MODE: {0}]"
                    logger.debug(log_msg.format(file_mode))

                    k.get_contents_to_file(tmp_file_handler)

                    # This is necessary, Boto doesn't do it
                    tmp_file_handler.seek(0)
                else:
                    logger.debug("Using a regular file")

                    _, filename = os.path.split(key)

                    if path:
                        final_path = os.path.join(path, filename)
                    else:
                        final_path = filename

                    k.get_contents_to_filename(final_path)

                logger.info('File {0} was succesfully downloaded'.format(key))

                if temporary:
                    return tmp_file_handler
                elif return_path:
                    return final_path

            except Exception:
                log_msg = 'Download failed for {0}/{1}...'.format(self.s3_bucket_url, key)

                fails += 1

                if fails >= retries:
                    log_msg += ' Not retrying (Failed retries: {0})'.format(retry)
                    logger.error(log_msg)
                    raise
                else:
                    log_msg += ' Retrying'
                    logger.info(log_msg)
Ejemplo n.º 34
0
    def fetch_pack_from_origin(
        self,
        origin_url: str,
        base_repo: RepoRepresentation,
        do_activity: Callable[[bytes], None],
    ) -> FetchPackReturn:
        """Fetch a pack from the origin"""

        pack_buffer = SpooledTemporaryFile(max_size=self.temp_file_cutoff)
        transport_url = origin_url

        logger.debug("Transport url to communicate with server: %s", transport_url)

        client, path = dulwich.client.get_transport_and_path(
            transport_url, thin_packs=False
        )

        logger.debug("Client %s to fetch pack at %s", client, path)

        size_limit = self.pack_size_bytes

        def do_pack(data: bytes) -> None:
            cur_size = pack_buffer.tell()
            would_write = len(data)
            if cur_size + would_write > size_limit:
                raise IOError(
                    f"Pack file too big for repository {origin_url}, "
                    f"limit is {size_limit} bytes, current size is {cur_size}, "
                    f"would write {would_write}"
                )

            pack_buffer.write(data)

        pack_result = client.fetch_pack(
            path,
            base_repo.determine_wants,
            base_repo.graph_walker(),
            do_pack,
            progress=do_activity,
        )

        remote_refs = pack_result.refs or {}
        symbolic_refs = pack_result.symrefs or {}

        pack_buffer.flush()
        pack_size = pack_buffer.tell()
        pack_buffer.seek(0)

        logger.debug("fetched_pack_size=%s", pack_size)

        # check if repository only supports git dumb transfer protocol,
        # fetched pack file will be empty in that case as dulwich do
        # not support it and do not fetch any refs
        self.dumb = transport_url.startswith("http") and getattr(client, "dumb", False)

        return FetchPackReturn(
            remote_refs=utils.filter_refs(remote_refs),
            symbolic_refs=utils.filter_refs(symbolic_refs),
            pack_buffer=pack_buffer,
            pack_size=pack_size,
        )
Ejemplo n.º 35
0
 def _write_tempfile(self, writer_callable):
     fh = SpooledTemporaryFile()
     writer_callable(fh)
     fh.seek(0)
     return fh
Ejemplo n.º 36
0
 def create_zipfile_storage(self):
     spooled = SpooledTemporaryFile()
     spooled.write(self.zipfile_content)
     zip_file_storage = FileStorage(spooled)
     zip_file_storage.filename = DOWNLOAD_FILE.rpartition(os.path.sep)[-1]
     return zip_file_storage
Ejemplo n.º 37
0
def _temporary_file():
    return SpooledTemporaryFile(max_size=1024 * 1024 * 10)  # 10 MB.
Ejemplo n.º 38
0
 def __init__(self, s3key, max_mem=0):
     self.key = s3key
     self.max_mem = max_mem
     self._stream = SpooledTemporaryFile(max_size=self.max_mem)
     self.key.get_contents_to_file(self._stream)
     self._stream.seek(0)
Ejemplo n.º 39
0
    def run_command(self, command, stdin=None, env=None):
        """
        Launch a shell command line.

        :param command: Command line to launch
        :type command: str
        :param stdin: Standard input of command
        :type stdin: file
        :param env: Environment variable used in command
        :type env: dict
        :return: Standard output of command
        :rtype: file
        """
        cmd = shlex.split(command)
        stdout = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        stderr = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        full_env = os.environ.copy() if self.use_parent_env else {}
        full_env.update(self.env)
        full_env.update(env or {})
        try:
            if isinstance(stdin, File):
                process = Popen(
                    cmd, stdin=stdin.open("rb"), stdout=stdout, stderr=stderr,
                    env=full_env
                )
            else:
                process = Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr, env=full_env)
            process.wait()
            if process.poll():
                stderr.seek(0)
                raise exceptions.CommandConnectorError(
                    "Error running: {}\n{}".format(command, stderr.read().decode('utf-8')))
            stdout.seek(0)
            stderr.seek(0)
            return stdout, stderr
        except OSError as err:
            raise exceptions.CommandConnectorError(
                "Error running: {}\n{}".format(command, str(err)))
 def _create_buffer(self):
     return SpooledTemporaryFile(BUFF_SIZE)
Ejemplo n.º 41
0
class RecvWindow(object):
    """
    Receiver windows receive data on a socket to fill their
    buffer.

    A window instance has the following attributes:

        name        Name of the window. The name of all windows is shown by
                    '> win show' in command line interface. It's usually the
                    name of the plugin appended by the arguments passed for
                    execution (e.g. ps.plg -A -t curesec.com)
        status      indicates the state of the plugin (e.g. RUNNING or FINISHED)
        req_t       request tuple that triggered plugin execution. It's used to
                    verify incoming packets
        plg_id      id of the plugin
        rid         request id of the packet triggering execution
        wid         the id of the window. Derived from a class variable which
                    is increased after window creation. The window id is not
                    fixed. So every new client session, the windows might have
                    a new id.
        session     the actual client session
        is_used     flag, that indicates, whether the windows is used by user
                    (via '> win show <id>')

    """

    #: command to close window
    CMD_CLOSE = "close"

    #: window id, increased for every new window
    ID = 1

    def __init__(self, name, req_t, session):
        logger.debug("creating window %s", name)
        self.name = name
        self.status = RUNNING
        self.req_t = req_t
        self.sid = req_t[1]
        self.rid = req_t[2]
        self.plg_id = req_t[4]
        self.wid = RecvWindow.ID
        self.session = session
        self.is_used = False
        self.closed = False

        self._buffer = SpooledTemporaryFile(max_size="8096", mode="wr+b")
        RecvWindow.ID += 1

    def close(self):
        self.closed = True
        print("closing window %d" % self.wid)

    def set_state(self, input_code):
        state = None
        try:
            state = STATE[input_code + 1]
        except (IndexError, TypeError) as e:
            logger.warning('Could not get window state for code %d: %s',
                           input_code, e)
            logger.exception(e)

        if not state:
            state = "Unknown"

        if self.is_used and state in (FINISHED, ERROR, KILLED):
            self.is_used = False

        code = "(%s)" % str(input_code)
        self.status = "{state} {code}".format(state=state, code=code)

        logger.debug("new window(id:%s) state: %s", self.wid, self.status)

    def _write_to_output(self, data):
        # if no more data is coming, set window state to
        # finished and break if we have a non-interactive
        # window (otherwise we need to wait for the user
        # to enter further input).
        #if not data:
        #    self.status = FINISHED
        # TODO: that seems a bit heuristic to me ...

        # ok, we have data, so verify 'em and print it. this is
        # done by unpack
        try:
            actual_output = self._unpack(data)
            if actual_output:
                sys.stdout.write(actual_output)
                sys.stdout.flush()
                self._buffer.write(actual_output)
                self._buffer.flush()

        except errors.ClientError as e:
            if e.request:
                logger.warning('Request: %s', conn.ccdlib.pkt2str(e.request))
            if e.response:
                logger.warning('Response: %s', conn.ccdlib.pkt2str(e.response))
            logger.debug("received invalid packet:'%s'", e)
            logger.exception(e)
        except Exception as e:
            logger.debug("received invalid packet:'%s'", e)
            logger.exception(e)

    def _unpack(self, resp_t):
        #if not resp_t[0] == cnnm.ccdlib.OP_SUCCESS:
        #    raise Exception("received packet that indicates a failure(%s)!",
        #                    hex(resp_t[0]))

        ## sid
        #if not resp_t[1] == self.req_t[1]:
        #    raise Exception("received packet with invalid session id(%s)!",
        #                    resp_t[1])

        ## rid
        #if not resp_t[2] == self.req_t[2]:
        #    raise Exception("received packet with invalid rid(%s)!",
        #                    resp_t[2])

        ## plg_id
        #if not resp_t[4] == self.req_t[4]:
        #    raise Exception("received packet with invalid plugin id(%s)!",
        #                    resp_t[4])

        # method indicates a plugin's state change.
        if resp_t[6] == conn.ccdlib.MTH_TERMINATE:
            state = 2
            try:
                state = resp_t[-1]["code"]
            except (KeyError, IndexError):
                raise errors.InvalidServerResponse(
                    message="No state in terminate response payload",
                    request=self.req_t,
                    response=resp_t)
            self.set_state(state)
            return

        elif resp_t[6] == conn.ccdlib.MTH_INPUT:
            return ""

        # plugin gives outpout
        elif not resp_t[6] == conn.ccdlib.MTH_OUTPUT:
            try:
                MTH_str = conn.ccdlib.rev_MTH_map[resp_t[6]]
            except:
                MTH_str = repr(resp_t[6])
            raise errors.InvalidServerResponse(
                message=
                "Invalid method %s, expected MTH_OUTPUT, _TERMINATE or _INPUT"
                % MTH_str,
                request=self.req_t,
                response=resp_t)

        return resp_t[-1]

    def _pack(self, data):
        req_t = list(self.req_t)

        req_t[
            0] = conn.ccdlib.OP_PLUGIN  # indicate operation concerning plugin
        req_t[6] = conn.ccdlib.MTH_INPUT  # indicate plugin input is coming
        req_t[-1] = data  # payload

        # if no input to commit, raise EmptyException
        if not req_t[-1]:
            raise conn.ccdlib.EmptyException

        return tuple(req_t)

    def use(self):
        # to get the plugin's output, we need to register
        rid = self.req_t[2]
        plg_id = self.req_t[4]
        pld = comm.sendpacket(self.session,
                              op=conn.ccdlib.OP_REGISTER,
                              plg=plg_id,
                              pld=dict(rid=rid))[-1]
        interactive = pld["interactive"]
        logger.debug("successfully registered to plugin. have fun!")

        if interactive:
            print("Enter '%s' to leave interactive mode." %
                  RecvWindow.CMD_CLOSE)

        sock = self.session.sock

        # write content that is already buffered
        for line in self._buffer:
            sys.stdout.write(line)

        # start window interaction
        self.is_used = True
        while self.is_used:

            try:
                r, _, _ = select.select([sys.stdin, sock], [], [])
            except KeyboardInterrupt:
                print("catched keyboard interrupt. closing window.")
                self.is_used = False
                break

            # if there is something to print to window
            if sock in r:
                # read data, that should be written to stdout.
                # the data is sent by the ccd
                resp_t = None
                try:
                    resp_t = conn.ccdlib.recv(sock)
                    #resp_t = comm.wait_for_response(self.session, self.req_t)
                except Exception as e:
                    logger.error("Exception while receiving data: %s", e)
                    logger.exception(e)
                    self.is_used = False
                    break

                if resp_t:
                    self._write_to_output(resp_t)

            # there is something to read form window console
            if sys.stdin in r:
                if not interactive:
                    # any keystroke sends active plugin to background
                    self.is_used = False
                    break

                # read the user's input to be sent to the ccd plugin
                #data = sys.stdin.readline()
                data = ''
                try:
                    data = os.read(sys.stdin.fileno(), 1024)
                except Exception, e:
                    logger.error("Error reading from stdin: '%s'!", e)
                    logger.exception(e)
                    continue

                logger.debug('all read on stdin \'%s\'', data)

                # catch command to close the interactive window
                if data.rstrip() == RecvWindow.CMD_CLOSE:
                    self.is_used = False
                    break

                # the actual write of the user's input
                actual_packet = self._pack(data)

                try:
                    conn.ccdlib.send(sock, actual_packet)
                    #comm.wait_for_response(self.session, actual_packet)
                except Exception as e:
                    logger.error("Failed to send user input to plugin!:'%s'",
                                 e)
                    logger.exception(e)

        try:
            comm.sendpacket(self.session,
                            op=conn.ccdlib.OP_UNREGISTER,
                            plg=plg_id,
                            pld=dict(rid=rid))
        except KeyboardInterrupt:
            print("catched keyboard interrupt. closing window.")
        except Exception as e:
            logger.error("Failed to unregister properly:'%s'!", e)
            logger.exception(e)
Ejemplo n.º 42
0
 def __init__(self, threshold=10 * 1024 * 1024, **kw):
     # STF uses >, the old ATF used >= for the max_size check
     SpooledTemporaryFile.__init__(self, max_size=threshold - 1, **kw)
Ejemplo n.º 43
0
 def compress(self, data: bytes):
     if self._buffer is None:
         self._buffer = SpooledTemporaryFile(mode='wb')
     self._buffer.write(data)
Ejemplo n.º 44
0
 def __init__(self, sep=b','):
     self._buffer = SpooledTemporaryFile(mode='wb')
     self._result = SpooledTemporaryFile(mode='wb')
     self._sep = sep
     self.eof = False
Ejemplo n.º 45
0
def get(url, ignoreVersion=False):
    '''Given a Send url, download and return the encrypted data and metadata'''
    prefix, urlid, key = splitkeyurl(url)

    if checkServerVersion(prefix, ignoreVersion) == False:
        raise Exception(
            'Potentially incompatible server version, use --ignore-version to disable version checks'
        )

    data = SpooledTemporaryFile(max_size=SPOOL_SIZE, mode='w+b')

    r = requests.get(prefix + 'api/download/' + urlid, stream=True)
    r.raise_for_status()
    content_length = int(r.headers['Content-length'])
    meta = json.loads(r.headers['X-File-Metadata'])
    filename = unquote_plus(meta['filename'])
    iv = meta['id']

    pbar = progbar(content_length)
    for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
        data.write(chunk)
        pbar.update(len(chunk))
    pbar.close()

    # The last 16 bytes / 128 bits of data is the GCM tag
    # https://www.w3.org/TR/WebCryptoAPI/#aes-gcm-operations :-
    # 7. Let ciphertext be equal to C | T, where '|' denotes concatenation.
    data.seek(-16, 2)
    tag = data.read()

    # now truncate the file to only contain encrypted data
    data.seek(-16, 2)
    data.truncate()

    data.seek(0)
    return data, filename, key, iv, tag
Ejemplo n.º 46
0
def reconstitute_vector(bytesblob):
    f = SpooledTemporaryFile(max_size=1000000000)
    f.write(bytesblob)
    f.seek(0)
    return scipyio.mmread(f)
Ejemplo n.º 47
0
    def fetch_media(self, url, partial_fetch=False):
        """Retrieves a given media object from a remote (HTTP) location
        and returns the content-type and a file-like object containing
        the media content.

        The file-like object is a temporary file that - depending on the
        size - lives in memory or on disk. Once the file is closed, the
        contents are removed from storage.

        :param url: the URL of the media asset.
        :type url: str.
        :param partial_fetch: determines if the the complete file should
            be fetched, or if only the first 2 MB should be retrieved.
            This feature is used to prevent complete retrieval of large
            a/v material.
        :type partial_fetch: bool.
        :returns: a tuple with the ``content-type``, ``content-lenght``
            and a file-like object containing the media content. The
            value of ``content-length`` will be ``None`` in case
            a partial fetch is requested and ``content-length`` is not
            returned by the remote server.
        """

        http_resp = self.http_session.get(url, stream=True, timeout=(60, 120))
        http_resp.raise_for_status()

        if not os.path.exists(TEMP_DIR_PATH):
            log.debug('Creating temp directory %s' % TEMP_DIR_PATH)
            os.makedirs(TEMP_DIR_PATH)

        # Create a temporary file to store the media item, write the file
        # to disk if it is larger than 1 MB.
        media_file = SpooledTemporaryFile(max_size=1024 * 1024,
                                          prefix='oad_m_',
                                          suffix='.tmp',
                                          dir=TEMP_DIR_PATH)

        # When a partial fetch is requested, request up to two MB
        partial_target_size = 1024 * 1024 * 2
        content_length = http_resp.headers.get('content-length')
        if content_length and int(content_length) < partial_target_size:
            partial_target_size = int(content_length)

        retrieved_bytes = 0
        for chunk in http_resp.iter_content(chunk_size=512 * 1024):
            if chunk:  # filter out keep-alive chunks
                media_file.write(chunk)
                retrieved_bytes += len(chunk)

            if partial_fetch and retrieved_bytes >= partial_target_size:
                break

        media_file.flush()
        log.debug('Fetched media item %s [%s/%s]' %
                  (url, retrieved_bytes, content_length))

        # If the server doens't provide a content-length and this isn't
        # a partial fetch, determine the size by looking at the retrieved
        # content
        if not content_length and not partial_fetch:
            media_file.seek(0, 2)
            content_length = media_file.tell()

        return (http_resp.headers.get('content-type'), content_length,
                media_file)
Ejemplo n.º 48
0
 def __enter__(self):
     self.tempfile = SpooledTemporaryFile()
     self.tempfile.write(self.inputstr)
     self.tempfile.seek(0)
     return self.tempfile
Ejemplo n.º 49
0
class VersionedFile(io.BufferedIOBase):
    def __init__(self,
                 manager,
                 filename,
                 mode=Perm.read,
                 requestor=Owner.ALL,
                 meta=None,
                 rev=None,
                 file_info=None,
                 **kwargs):
        io.BufferedIOBase.__init__(self)
        self.path = self.name = filename
        # manager.check_perm(self.path, owner=requestor, perm=mode)
        self.created = self.modified = None
        self.data = None
        self.meta = meta or {}
        self.mode = mode
        self._seekable = True
        self.length = 0
        self.bs = 8192
        self._cipher = None
        self.manager = manager
        self._file_info = file_info or manager.get_metadata_and_check_perm(
            filename, rev, mode=mode, owner=requestor)
        # self._file_info = manager.get_file_metadata(filename, rev, mode=mode)
        if self._file_info:
            self.update(self._file_info)

        if mode == Perm.read and not self._file_info:
            raise FileNotFoundError(self.path)
        elif mode == Perm.write:
            self.owner = requestor

        if kwargs:
            self.update(kwargs)
        self._pos = 0
        if mode == Perm.read:
            if self.data:
                self._curr_chunk = self.data
                self._curr_chunk_num = 0
            else:
                self._curr_chunk_num = None
                self._curr_chunk = None
        else:
            self._buf = SpooledTemporaryFile(
                max_size=getattr(self, 'buffer_threshold', 52428800))
            self.hash = None

    @property
    def is_dir(self):
        return self.content_type == u'application/x-directory'

    def do_hash(self, algo='sha256'):
        self.hash = algo

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc, tb):
        if exc:
            if self.readable():
                self.close()
            else:
                self._buf.close()
                self.mode = None
            import six
            six.reraise(exc_type, exc, tb)
        else:
            self.close()

    def close(self):
        if self.closed:
            return
        if self.writable():
            self._buf.seek(0, 2)
            length = self.length = self._buf.tell()
            self._buf.seek(0)

            hist_data = {
                u'meta': self.meta,
                u'owner': getattr(self, 'owner', None),
                u'length': length,
                u'hash': self.hash,
                u'created': self.created,
                u'modified': self.modified,
                u'file_info': self._file_info,
            }
            content_type = getattr(self, 'content_type', None)
            if not content_type:
                content_type = mimetypes.guess_type(self.path)[0]
            hist_data[u'content_type'] = content_type

            if getattr(self, 'force_rev', None) is not None:
                hist_data[u'rev'] = rev = self.force_rev
                hist_data[u'modified'] = self.created

            self.update(
                self.manager.save_file_data(self.path,
                                            hist_data,
                                            self._buf,
                                            cipher=self._cipher))

            self._buf.close()
            self._buf = None
        self.mode = None
        io.BufferedIOBase.close(self)

    # def __del__(self):
    #     self.close()

    def readable(self):
        return self.mode == Perm.read

    def writable(self):
        return self.mode == Perm.write

    def seekable(self):
        return self._seekable

    def tell(self):
        if self.readable():
            return self._pos
        else:
            return self._buf.tell()

    def seek(self, pos, whence=0):
        if self.mode == Perm.read:
            curpos = self._pos
            if whence == 0:
                abspos = pos
            elif whence == 1:
                abspos = curpos + pos
            elif whence == 2:
                abspos = self.length + pos
            self._pos = abspos
            return self._pos
        elif self.mode == Perm.write and self.seekable():
            return self._buf.seek(pos, whence)

    def read(self, size=-1):
        if self.mode != Perm.read:
            return
        elif self._pos == self.length:
            return b''
        buf = bytearray()
        if self._pos == 0 and size == -1:
            if self.data:
                self._pos = self.length
                return self.data
            else:
                # optimization for reading the whole file
                i = 0
                for chunk in self.manager.get_file_chunks(self._file_info,
                                                          cipher=self._cipher):
                    i += 1
                    buf.extend(chunk)
                self._pos = len(buf)
                return bytes(buf)

        length = size if size > 0 else self.length
        where, pos = divmod(self._pos, self.bs)

        if self._curr_chunk_num != where:
            self._curr_chunk = self.manager.get_file_chunk(self._file_info,
                                                           where,
                                                           cipher=self._cipher)
            self._curr_chunk_num = where
        buf += self._curr_chunk[pos:]
        while len(buf) < length:
            where += 1
            self._curr_chunk = self.manager.get_file_chunk(self._file_info,
                                                           where,
                                                           cipher=self._cipher)
            if self._curr_chunk is None:
                self._curr_chunk_num = None
                break
            buf.extend(self._curr_chunk)
            self._curr_chunk_num = where
        read = buf[:length]
        self._pos += len(read)
        return bytes(read)

    def readall(self):
        return self.read()

    def write(self, data):
        if not data:
            return
        if not self.writable():
            raise FileError()
        if isinstance(data, six.text_type):
            data = data.encode('utf8')

        wrote = len(data)

        self._buf.write(data)
        return wrote

    def update(self, kwargs):
        if kwargs:
            for k, v in kwargs.items():
                if k == 'modified' and self.mode == 'w':
                    continue
                if v is not None:
                    setattr(self, k, v)

    def set_encryption(self, password='', save_password=False):
        """
        Set the encryption password, optionally saving the password in the metadata
        """
        try:
            from nacl.secret import SecretBox
        except ImportError:
            SecretBox = None
        if SecretBox:
            password = hashlib.sha256(password.encode('utf8')).digest()
        else:
            password = hashlib.sha512(password.encode('utf8')).digest()[:56]
        if self.writable():
            assert self._cipher is None
            if SecretBox:
                method = u'nacl'
                self.meta[u'_encryption'] = {u'method': method}
            else:
                method = u'cfb'
                self.meta[u'_encryption'] = {
                    u'method': method,
                    u'iv': os.urandom(8),
                }
            if save_password:
                self.meta[u'_encryption'][u'key'] = password
        else:
            assert u'_encryption' in self.meta
            method = self.meta[u'_encryption'][u'method']
            password = self.meta[u'_encryption'].get(u'key', None) or password
        if method == u'nacl':
            c = SecretBox(password)
            self._cipher = {'encrypt': c.encrypt, 'decrypt': c.decrypt}
        else:
            import blowfish
            c = blowfish.Cipher(password)
            iv = self.meta[u'_encryption'][u'iv']
            self._cipher = {
                'encrypt': lambda chunk: b''.join(c.encrypt_cfb(chunk, iv)),
                'decrypt': lambda chunk: b''.join(c.decrypt_cfb(chunk, iv)),
            }
        if self.data:
            self._curr_chunk = self._cipher['decrypt'](self.data)
Ejemplo n.º 50
0
    def do_execute(self, code, silent, store_history=True, user_expressions=None,
                   allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {'status': 'ok', 'execution_count': self.execution_count,
                    'payload': [], 'user_expressions': {}}
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get('XONSH_ENCODING')
        out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        try:
            with redirect_stdout(out), redirect_stderr(err), \
                 swap(builtins, '__xonsh_stdout_uncaptured__', out), \
                 swap(builtins, '__xonsh_stderr_uncaptured__', err), \
                 env.swap({'XONSH_STORE_STDOUT': False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks('stdout', out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks('stderr', err.read())
            if hasattr(builtins, '_') and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks('stdout', pformat(builtins._))
                builtins._ = None
            if hist is not None and len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks('stdout', hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {'status': 'abort', 'execution_count': self.execution_count}

        rtn = 0 if (hist is None or len(hist) == 0) else hist.rtns[-1]
        if 0 < rtn:
            message = {'status': 'error', 'execution_count': self.execution_count,
                       'ename': '', 'evalue': str(rtn), 'traceback': []}
        else:
            message = {'status': 'ok', 'execution_count': self.execution_count,
                       'payload': [], 'user_expressions': {}}
        return message
Ejemplo n.º 51
0
 def create_zipfile_storage(self):
     spooled = SpooledTemporaryFile()
     spooled.write(self.zipfile_file.read())
     zip_file_storage = FileStorage(spooled)
     zip_file_storage.filename = self.ZIP_NAME
     return zip_file_storage
Ejemplo n.º 52
0
def runTest(test, testnum):
    # test is a tuple of ( filename , usedb<bool> )
    # filename should be a js file to run
    # usedb is true if the test expects a mongod to be running

    (path, usedb) = test
    (ignore, ext) = os.path.splitext(path)
    # the dbtests know how to format themselves nicely, we'll detect if we're running them and if
    # so, we won't mess with the output
    is_test_binary = False
    if skipTest(path):
        if quiet:
            sys.stdout.write("skip %d %s\n" %
                             (testnum, os.path.basename(path)))
            sys.stdout.flush()
        else:
            print "skipping " + path
        return
    if file_of_commands_mode:
        # smoke.py was invoked like "--mode files --from-file foo",
        # so don't try to interpret the test path too much
        if os.sys.platform == "win32":
            argv = [path]
        else:
            argv = shlex.split(path)
        path = argv[0]
        # if the command is a python script, use the script name
        if os.path.basename(path) in ('python', 'python.exe'):
            path = argv[1]
    elif ext == ".js":
        argv = [
            shell_executable, "--port", mongod_port,
            '--authenticationMechanism', authMechanism
        ]
        if not usedb:
            argv += ["--nodb"]
        if small_oplog or small_oplog_rs:
            argv += ["--eval", 'testingReplication = true;']
        if use_ssl:
            argv += [
                "--ssl", "--sslPEMKeyFile", "jstests/libs/client.pem",
                "--sslCAFile", "jstests/libs/ca.pem"
            ]
        argv += [path]
    elif ext in ["", ".exe"]:
        # Blech.
        if os.path.basename(path) in [
                "test", "test.exe", "perftest", "perftest.exe"
        ]:
            argv = [path]
            if os.path.basename(path) in ["test", "test.exe"]:
                is_test_binary = True
        # more blech
        elif os.path.basename(path) in ['mongos', 'mongos.exe']:
            argv = [path, "--test"]
        else:
            argv = [
                test_path and os.path.abspath(os.path.join(test_path, path))
                or path, "--port", mongod_port
            ]
    else:
        raise Bug("fell off in extension case: %s" % path)

    if keyFile:
        f = open(keyFile, 'r')
        keyFileData = re.sub(r'\s', '', f.read())  # Remove all whitespace
        f.close()
        os.chmod(keyFile, stat.S_IRUSR | stat.S_IWUSR)
    else:
        keyFileData = None

    mongo_test_filename = os.path.basename(path)
    if 'sharedclient' in path:
        mongo_test_filename += "-sharedclient"

    # sys.stdout.write() is more atomic than print, so using it prevents
    # lines being interrupted by, e.g., child processes
    if quiet and not is_test_binary:
        vlog = tests_log
        qlog = sys.stdout
        tlog = sys.stderr
    else:
        vlog = sys.stdout
        qlog = None
        tlog = None

    vlog.write(" *******************************************\n")
    vlog.write("         Test : %s ...\n" % mongo_test_filename)
    vlog.flush()

    # FIXME: we don't handle the case where the subprocess
    # hangs... that's bad.
    if (argv[0].endswith('mongo')
            or argv[0].endswith('mongo.exe')) and not '--eval' in argv:
        evalString = 'TestData = new Object();' + \
                     'TestData.testPath = "' + path + '";' + \
                     'TestData.testFile = "' + os.path.basename( path ) + '";' + \
                     'TestData.testName = "' + re.sub( ".js$", "", os.path.basename( path ) ) + '";' + \
                     'TestData.noJournal = ' + ternary( no_journal )  + ";" + \
                     'TestData.noJournalPrealloc = ' + ternary( no_preallocj )  + ";" + \
                     'TestData.auth = ' + ternary( auth ) + ";" + \
                     'TestData.keyFile = ' + ternary( keyFile , '"' + str(keyFile) + '"' , 'null' ) + ";" + \
                     'TestData.keyFileData = ' + ternary( keyFile , '"' + str(keyFileData) + '"' , 'null' ) + ";"
        if os.sys.platform == "win32":
            # double quotes in the evalString on windows; this
            # prevents the backslashes from being removed when
            # the shell (i.e. bash) evaluates this string. yuck.
            evalString = evalString.replace('\\', '\\\\')

        if auth and usedb:
            evalString += 'jsTest.authenticate(db.getMongo());'

        argv = argv + ['--eval', evalString]

    if argv[0].endswith('test') and no_preallocj:
        argv = argv + ['--nopreallocj']

    vlog.write("      Command : %s\n" % ' '.join(argv))
    vlog.write("         Date : %s\n" % datetime.now().ctime())
    vlog.flush()

    tempfile = SpooledTemporaryFile(max_size=16 * 1024 * 1024)

    try:
        os.environ['MONGO_TEST_FILENAME'] = mongo_test_filename
        t1 = time.time()
        r = call(
            buildlogger(argv),
            cwd=test_path,
            # the dbtests know how to format their own output nicely
            stdout=ternary(is_test_binary, vlog, tempfile))
        t2 = time.time()
        del os.environ['MONGO_TEST_FILENAME']

        vlog.write("                %fms\n" % ((t2 - t1) * 1000))
        vlog.flush()

        if not is_test_binary:
            tempfile.seek(0)
            for line in tempfile:
                vlog.write(line)
            vlog.flush()

            if quiet:
                if r == 0:
                    qlog.write('ok %d %s\n' %
                               (testnum, os.path.basename(path)))
                else:
                    qlog.write('not ok %d %s # exit %d\n' %
                               (testnum, os.path.basename(path), r))
                qlog.flush()
                if r != 0:
                    tempfile.seek(0)
                    for line in tempfile:
                        tlog.write(line)
                    tlog.flush()
        if r != 0:
            raise TestExitFailure(path, r)
    finally:
        tempfile.close()

    if r != 0:
        raise TestExitFailure(path, r)

    if start_mongod:
        try:
            c = Connection(host="127.0.0.1",
                           port=int(mongod_port),
                           ssl=use_ssl)
        except Exception, e:
            print "Exception from pymongo: ", e
            raise TestServerFailure(path)
Ejemplo n.º 53
0
 def __init__(self, file_obj, max_size=50 * 1024 * 1024):
     self._file_obj = file_obj
     SpooledTemporaryFile.__init__(self, max_size)
Ejemplo n.º 54
0
    def flush(self):
        """Flush the data to the server."""

        SpooledTemporaryFile.flush(self)
        self.commit()
Ejemplo n.º 55
0
    def upload_file(self,
                    user,
                    stream,
                    expected_size,
                    filename,
                    force_coll_name=''):
        """Upload WARC archive.

        :param User user: user
        :param stream: file object
        :param int expected_size: expected WARC archive size
        :param str filename: WARC archive filename
        :param str force_coll_name: name of collection to upload into

        :returns: upload information
        :rtype: dict
        """
        temp_file = None
        logger.debug('Upload Begin')

        logger.debug('Expected Size: ' + str(expected_size))

        #is_anon = False

        size_rem = user.get_size_remaining()

        logger.debug('User Size Rem: ' + str(size_rem))

        if size_rem < expected_size:
            return {'error': 'out_of_space'}

        if force_coll_name and not user.has_collection(force_coll_name):
            #if is_anon:
            #    user.create_collection(force_coll, 'Temporary Collection')

            #else:
            #status = 'Collection {0} not found'.format(force_coll_name)
            return {'error': 'no_such_collection'}

        temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE)

        stream = CacheingLimitReader(stream, expected_size, temp_file)

        if filename.endswith('.har'):
            stream, expected_size = self.har2warc(filename, stream)
            temp_file.close()
            temp_file = stream

        infos = self.parse_uploaded(stream, expected_size)

        total_size = temp_file.tell()
        if total_size != expected_size:
            return {
                'error': 'incomplete_upload',
                'expected': expected_size,
                'actual': total_size
            }

        upload_id, upload_key = self._init_upload_status(user,
                                                         total_size,
                                                         1,
                                                         filename=filename)

        return self.handle_upload(temp_file, upload_id, upload_key, infos,
                                  filename, user, force_coll_name, total_size)
Ejemplo n.º 56
0
    def upload_file(self):
        stream = None
        temp_file = None
        logger.debug('Upload Begin')

        expected_size = int(request.headers['Content-Length'])

        logger.debug('Expected Size: ' + str(expected_size))

        if not expected_size:
            return {'error_message': 'No File Specified'}

        curr_user = self.manager.get_curr_user()

        if not curr_user:
            #user = self.manager.get_anon_user()
            #force_coll = 'temp'
            #is_anon = True

            return {
                'error_message':
                'Sorry, uploads only available for logged-in users'
            }

        user = curr_user
        force_coll = request.query.getunicode('force-coll', '')
        is_anon = False

        size_rem = self.manager.get_size_remaining(user)

        logger.debug('User Size Rem: ' + str(size_rem))

        if size_rem < expected_size:
            return {
                'error_message': 'Sorry, not enough space to upload this file'
            }

        if force_coll and not self.manager.has_collection(user, force_coll):
            if is_anon:
                self.manager.create_collection(user, force_coll,
                                               'Temporary Collection')

            else:
                status = 'Collection {0} not found'.format(force_coll)
                return {'error_message': status}

        temp_file = SpooledTemporaryFile(max_size=BLOCK_SIZE)

        filename = request.query.getunicode('filename')

        stream = request.environ['wsgi.input']
        stream = CacheingLimitReader(stream, expected_size, temp_file)

        if filename.endswith('.har'):
            stream, expected_size = self.har2warc(filename, stream)
            temp_file.close()
            temp_file = stream

        infos = self.parse_uploaded(stream, expected_size)

        total_size = temp_file.tell()
        if total_size != expected_size:
            return {
                'error_message':
                'size mismatch: expected {0}, got {1}'.format(
                    expected_size, total_size)
            }

        upload_id = self._get_upload_id()

        upload_key = self.upload_key.format(user=user, upid=upload_id)

        with redis_pipeline(self.manager.redis) as pi:
            pi.hset(upload_key, 'size', 0)
            pi.hset(upload_key, 'total_size', total_size * 2)
            pi.hset(upload_key, 'filename', filename)
            pi.hset(upload_key, 'total_files', 1)
            pi.hset(upload_key, 'files', 1)

        return self.handle_upload(temp_file, upload_id, upload_key, infos,
                                  filename, user, force_coll, total_size)
Ejemplo n.º 57
0
class OcrSource(panoply.DataSource):
    """
    One Click Retail data source.
    API Docs (require login) - https://api.oneclickretail.com/api-docs-login
    Implemented endpoints -
    GET /v5/clients/{client_uuid}/reports/export?format=csv
    """
    def __init__(self, source, options):
        super(OcrSource, self).__init__(source, options)

        if not source.get('destination'):
            source['destination'] = DESTINATION

        if not source.get('idpattern'):
            source['idpattern'] = IDPATTERN

        if not source.get('resources'):
            raise Exception('No resources selected')

        self.resource = None
        self.data = None
        self.resources = source.get('resources')
        self.api_key = source.get('apiKey')
        self.weeks = source.get('weeks', DEFAULT_WEEKS_BACK)
        self.clientUUID = source.get('clientUUID')
        self.processed = 0
        self.total = len(self.resources)

    def read(self, batch_size=None):
        try:
            if not self.resource:
                self.resource = self.resources.pop()
                self.processed += 1
        except IndexError:
            # IndexError will occur when attempting to pop and element
            # off an empty list. This indicates that we have already
            # processed all resources.
            return None

        # Send progress message
        progress_msg = 'Fetching data for %s' % self.resource.get('name')
        self.progress(self.processed, self.total, progress_msg)

        # If data remains from the last resource, use it.
        # Otherwise fetch data from the current resource.
        self.data = self.data or self._fetch_resource()

        batch = self._extract_batch(self.data, batch_size)

        # If no data was returned for this batch we might have reached
        # the end of the source - attempt to read more.
        if not batch:
            # This was the last batch for this resource
            self.resource = None
            # Attempt to fetch the next resouce
            return self.read()

        return batch

    def _fetch_resource(self):
        """
        Assemble the api call, execute it and parse the
        csv response as a list of dicts. Returns a dict generator
        """

        qs = self._build_qs()  # Build the query string
        url = self._build_url(qs)  # Build the full url
        fp = self._api_call(url)  # Fetch the data as a file pointer

        # Parse the list of dicts in to a dict generator
        return csv.DictReader(fp)

    def _build_qs(self):
        params = {
            'format': 'csv',
            'meta': FETCH_META,
            'X-API-KEY': self.api_key,
            'weeks_back': self.weeks
        }

        if self.source.get('filterId'):
            params['filter_id'] = self.source['filterId']

        qs = urllib.urlencode(params)
        return qs

    def _build_url(self, qs):
        base = self.resource.get('value') % self.clientUUID
        url = '%(base)s/%(endpoint)s?%(qs)s' % {
            'base': BASE_URL,
            'endpoint': base,
            'qs': qs
        }
        return url

    def _api_call(self, url):
        """
        Returns a SpooledTemporaryFile - this is a file that is initially
        stored in memory but once its size exceedes max_size it will start
        writing to disk. It is used because there is no way of knowing how
        large of a file the api will return.
        We are expecting a csv file.
        """
        self.log('Request URL', url)
        response = urllib2.urlopen(url)

        # the response MUST be a csv file
        content_type = response.info().get('content-type')
        if 'csv' not in content_type:
            raise Exception('ERROR - Non CSV response.')

        # Decode the returned data and replace any characters that generate
        # encoding errors with the default unicode replacement character.
        data = response.read().decode('utf-8', 'replace')

        self.tmp_file = SpooledTemporaryFile(max_size=MAX_SIZE)
        # Force writing the data encoded as utf-8. If we don't do this,
        # python will attempt to write the data as 'ascii' which does not
        # support special characters

        self.tmp_file.write(data.encode('utf-8'))

        # 'rewind' the file pointer in order to
        # read it back durring `_extract_batch()`
        self.tmp_file.seek(0)

        return self.tmp_file

    def _extract_batch(self, data, batch_size):
        """
        Iterates over BATCH_SIZE of data
        returning a list or results
        """

        batch_size = batch_size or BATCH_SIZE

        batch = []
        try:
            for i in range(batch_size):
                batch.append(data.next())
        except StopIteration:
            pass

        return batch
Ejemplo n.º 58
0
def push():
    if not request.is_json:
        return status(400)
    data = request.json

    if not ('ref' in data and data['ref'].startswith('refs/tags/')):
        return status(200)

    base = data['compare_url'] + 'api/v1'
    headers = {
        'Accept': 'application/json',
        'Authorization': 'token ' + secret.token.gitea,
    }
    repo = data['repository']

    # get tag information
    uri = uritemplate.expand(base + '/repos/{owner}/{repo}/git/tags/{sha}',
                             owner=repo['owner']['username'],
                             repo=repo['name'],
                             sha=data['after'])
    r = requests.get(uri, headers=headers)
    if r.status_code != 200:
        return status(500, message=f'error fetching "{uri}"')

    tag = r.json()

    # create release
    uri = uritemplate.expand(base + '/repos/{owner}/{repo}/releases',
                             owner=repo['owner']['username'],
                             repo=repo['name'])
    payload = {
        'body': tag['message'],
        'draft': False,
        'name': kebab2normal(repo['name']) + ' ' + tag['tag'],
        'prerelease': False,
        'tag_name': tag['tag'],
        'target_commitish': repo['default_branch'],
    }
    r = requests.post(uri, headers=headers, json=payload)
    if r.status_code != 201:
        return status(500, message=f'error fetching "{uri}"')

    release = r.json()

    # create release zip
    with SpooledTemporaryFile() as f:
        with TemporaryDirectory() as d:
            p = run(['git', 'clone', repo['clone_url'], d])
            if p.returncode != 0:
                return status(500, message='error cloning repository')

            cmd = ['sh', os.path.join(d, '.bin', 'release.sh'), tag['tag']]
            if not os.path.exists(cmd[1]):
                cmd = ['git', 'archive', '--format=zip', tag['tag']]

            p = run(cmd, stdout=PIPE, cwd=d)
            if p.returncode != 0:
                return status(500, message='error creating archive')

            b = p.stdout

    # upload release zip
    uri = uritemplate.expand(base + '/repos/{owner}/{repo}/releases/{id}/assets?name={name}',
                             owner=repo['owner']['username'],
                             repo=repo['name'],
                             id=release['id'],
                             name=repo['name'] + '.zip')
    payload = {
        'attachment': (repo['name'] + '.zip', b, 'application/zip'),
    }
    r = requests.post(uri, headers=headers, files=payload)
    if r.status_code != 201:
        return status(500, message='error uploading archive')

    return status(200, message='release created')
Ejemplo n.º 59
0
class IncrementalWriter:
    """A streaming file writer for point clouds.

    Using the IncrementalWriter with spooled temporary files, which are
    only flushed to disk if they go above the given size, allows for
    streaming points to disk even when the header is unknown in advance.
    This allows some nice tricks, including splitting a point cloud into
    multiple files in a single pass, without memory issues.
    """

    # pylint:disable=too-few-public-methods

    def __init__(self,
                 filename: str,
                 header: PlyHeader,
                 utm: UTM_Coord = None,
                 buffer=2**22) -> None:
        """
        Args:
            filename: final place to save the file on disk.
            source_fname: source file for the pointcloud; used to detect
                file format for metadata etc.
            buffer (int): The number of bytes to hold in RAM before flushing
                the temporary file to disk.  Default 1MB, which holds ~8300
                points - enough for most objects but still practical to hold
                thousands in memory.  Set a smaller buffer for large forests.
        """
        self.filename = filename
        self.temp_storage = SpooledTemporaryFile(max_size=buffer, mode='w+b')
        self.count = 0
        self.utm = utm
        logging.debug(
            'At intitialisation, instance of IncrementalWriter.utm = {}'.
            format(self.utm))
        self.header = header
        # Always write in little-endian mode; only store type information
        self.binary = struct.Struct('<' + header.form_str[1:])

    def __call__(self, point) -> None:
        """Add a single point to this pointcloud, saving in binary format.

        Args:
            point (namedtuple): vertex attributes for the point, eg xyzrgba.
        """
        self.temp_storage.write(self.binary.pack(*point))
        self.count += 1

    def __del__(self):
        """Flush data to disk and clean up."""
        logging.debug('Flushing data to disk in IncrementalWriter.__del__()')
        to_ply_types = {v: k for k, v in PLY_TYPES.items()}
        properties = [
            'property {t} {n}'.format(t=t, n=n) for t, n in zip((
                to_ply_types[p]
                for p in self.header.form_str[1:]), self.header.names)
        ]
        head = [
            'ply', 'format binary_little_endian 1.0',
            'element vertex {}'.format(self.count), '\n'.join(properties),
            'end_header'
        ]
        if self.utm is not None:
            head.insert(
                -1, 'comment UTM x y zone north ' +
                '{0.x} {0.y} {0.zone} {0.north}'.format(self.utm))
        if not os.path.isdir(os.path.dirname(self.filename)):
            os.makedirs(os.path.dirname(self.filename))
        with open(self.filename, 'wb') as f:
            f.write(('\n'.join(head) + '\n').encode('ascii'))
            self.temp_storage.seek(0)
            chunk = self.temp_storage.read(8192)
            while chunk:
                f.write(chunk)
                chunk = self.temp_storage.read(8192)
        self.temp_storage.close()
Ejemplo n.º 60
0
    def run_command(self, command, stdin=None, env=None):
        """
        Launch a shell command line.

        :param command: Command line to launch
        :type command: str
        :param stdin: Standard input of command
        :type stdin: file
        :param env: Environment variable used in command
        :type env: dict
        :return: Standard output of command
        :rtype: file
        """
        cmd = shlex.split(command)
        stdout = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        stderr = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        full_env = self.env.copy()
        full_env.update(env or {})
        try:
            if not getattr(stdin, 'fileno', None):
                if stdin:
                    process = Popen(cmd,
                                    stdin=PIPE,
                                    stdout=stdout,
                                    stderr=stderr,
                                    env=full_env)
                    grep_stdout = process.communicate(
                        input=stdin.file.getvalue())[0]
                else:
                    process = Popen(cmd,
                                    stdin=stdin,
                                    stdout=stdout,
                                    stderr=stderr,
                                    env=full_env)
            else:
                process = Popen(cmd,
                                stdin=stdin,
                                stdout=stdout,
                                stderr=stderr,
                                env=full_env)
            process.wait()
            if process.poll():
                stderr.seek(0)
                raise exceptions.CommandConnectorError(
                    "Error running: {}\n{}".format(command, stderr.read()))
            stdout.seek(0)
            stderr.seek(0)
            return stdout, stderr
        except OSError as err:
            raise exceptions.CommandConnectorError(
                "Error running: {}\n{}".format(command, str(err)))