def unquote_folder_paths(state, schema):
    try:
        NodeSettings = state.get_model('addons_googledrive', 'nodesettings')
        targets = NodeSettings.objects.filter(folder_path__isnull=False)
    except LookupError:
        return
    for obj in targets:
        try:
            obj.folder_path = unquote(obj.folder_path).decode('utf-8')
        except UnicodeEncodeError:
            obj.folder_path = unquote(obj.folder_path)
    bulk_update(targets, update_fields=['folder_path'])
Beispiel #2
0
    def from_bytes(cls, bytes):
        """
        Parse a URL from some bytes.

        """

        try:  # this belongs on the first thing likely to cause a (Type)Error
            scheme, _, rest = bytes.strip().partition(b":")
        except Exception:
            exception = InvalidURL("{!r} is not a valid URL".format(bytes))
            raise_with_traceback(exception)

        if scheme and not rest.startswith(b"//"):
            raise InvalidURL(
                "{!r} is not a valid URL without initial '//'".format(bytes),
            )

        authority, slash, rest = rest[2:].partition(b"/")
        userinfo, _, host_and_port = authority.rpartition(b"@")
        username, _, password = userinfo.partition(b":")

        if host_and_port.startswith(b"["):  # IPv6 Host
            host, delimiter, port_str = host_and_port.partition(b"]:")
            host += b"]" if delimiter else b""
        else:
            host, _, port_str = host_and_port.partition(b":")

        if not port_str:
            port = None
        else:
            try:
                port = int(unquote(port_str))
            except ValueError:
                raise InvalidURL("{!r} is not a valid port".format(port_str))

        path, _, rest = rest.partition(b"?")
        query, _, fragment = rest.partition(b"#")

        return cls.normalized(
            scheme=scheme,
            username=username,
            password=password,
            host=host,
            port=port,
            path=unquote(slash + path),
            query=parse_qs(query, keep_blank_values=True),
            fragment=unquote_plus(fragment),
            unnormalized=bytes,
            unnormalized_authority=authority,
            unnormalized_userinfo=userinfo,
        )
Beispiel #3
0
    def from_bytes(cls, bytes):
        """
        Parse a URL from some bytes.

        """

        try:  # this belongs on the first thing likely to cause a (Type)Error
            scheme, _, rest = bytes.strip().partition(b":")
        except Exception:
            exception = InvalidURL("{!r} is not a valid URL".format(bytes))
            raise_with_traceback(exception)

        if scheme and not rest.startswith(b"//"):
            raise InvalidURL(
                "{!r} is not a valid URL without initial '//'".format(bytes), )

        authority, slash, rest = rest[2:].partition(b"/")
        userinfo, _, host_and_port = authority.rpartition(b"@")
        username, _, password = userinfo.partition(b":")

        if host_and_port.startswith(b"["):  # IPv6 Host
            host, delimiter, port_str = host_and_port.partition(b"]:")
            host += b"]" if delimiter else b""
        else:
            host, _, port_str = host_and_port.partition(b":")

        if not port_str:
            port = None
        else:
            try:
                port = int(unquote(port_str))
            except ValueError:
                raise InvalidURL("{!r} is not a valid port".format(port_str))

        path, _, rest = rest.partition(b"?")
        query, _, fragment = rest.partition(b"#")

        return cls.normalized(
            scheme=scheme,
            username=username,
            password=password,
            host=host,
            port=port,
            path=unquote(slash + path),
            query=parse_qs(query, keep_blank_values=True),
            fragment=unquote_plus(fragment),
            unnormalized=bytes,
            unnormalized_authority=authority,
            unnormalized_userinfo=userinfo,
        )
Beispiel #4
0
def caidao_decode(data, *args, **kwargs):
    p = PrintCollector()
    data_dict = query_str_2_dict(data.strip())
    d = {}
    for k, v in data_dict.items():
        v = unquote(v)
        try:
            x = force_bytes(v)
            missing_padding = len(v) % 4
            if missing_padding != 0:
                x += b'=' * (4 - missing_padding)

            d[k] = force_text(base64.decodebytes(x))
        except Exception as e:
            print(e)
            d[k] = v

    z0_raw = ''
    if 'z0' in d:
        z0_raw = d['z0']
        d['z0'] = ';\n'.join(d['z0'].split(';'))

    for k, v in d.items():
        value = '{}:\n{}\n'.format(k, v)
        p.print(value)
        if k == 'z0':
            if value != 'z0:\n{}\n'.format(z0_raw):
                p.print('z0_raw:\n{}\n'.format(z0_raw))

    return p.smart_output()
Beispiel #5
0
    def save_file(self, url_parsed, res):
        try:
            md5 = hashlib.md5(res.body).hexdigest()
            netloc = url_parsed.netloc
            site_dir_name = netloc.replace('.', '_').replace(':', '_')
            if url_parsed.path == '':
                path = site_dir_name + '/'
            else:
                path = site_dir_name + url_parsed.path
            path = unquote(path)
            if path.endswith('/'):
                path = path + 'index.html'
            dir_name = os.path.dirname(path)

            if md5 in self.file_md5_dict and self.file_md5_dict[md5] == path:
                return

            logger.warning('saved: {}'.format(path))
            # 相同目录下文件夹名不能跟文件名一样,因为无法保存,需要改名
            if os.path.exists(path) and os.path.isdir(path):
                path = path + '_' + str(uuid.uuid4())[-3:]
            elif os.path.exists(dir_name) and os.path.isfile(dir_name):
                dir_name = dir_name + '_' + str(uuid.uuid4())[-3:]
                path = dir_name + path.split('/')[-1]

            if not os.path.exists(dir_name):
                os.makedirs(dir_name)
            with open(path, 'wb') as f:
                f.write(res.body)

            self.file_md5_dict[md5] = path
        except Exception as e:
            logger.error(e)
            pass
Beispiel #6
0
    def endpointForURI(self, uri):
        if uri.scheme in (b"http", b"https", b"ws", b"wss"):
            defaultport = 443 if uri.scheme in (b"https", b"wss") else 80
            host, port = BaseUrl.parsenetloc(uri.netloc, defaultport)
            endpoint = t_endpoints.HostnameEndpoint(self.reactor, host, port)

            if defaultport == 443:
                ssl_supported = hasattr(t_endpoints, "TLSWrapperClientEndpoint")

                try:
                    from twisted.internet.ssl import optionsForClientTLS
                except ImportError:
                    ssl_supported = False

                if not ssl_supported:
                    raise t_error.SchemeNotSupported(
                        "{} not supported (OpenSSL is not available)".format(uri.scheme.decode("utf_8"))
                    )

                options = optionsForClientTLS(host.decode("utf_8"))
                endpoint = t_endpoints.TLSWrapperClientEndpoint(options, endpoint)

            return endpoint

        if uri.scheme == b"unix":
            path = url_parse.unquote(uri.netloc.decode("ascii"))
            uri.netloc = b"localhost"

            return t_endpoints.UNIXClientEndpoint(self.reactor, path)

        raise t_error.SchemeNotSupported("{} not supported (unrecognized)".format(uri.scheme.decode("utf_8")))
Beispiel #7
0
    def delete(self, session):
        """ Delete seen entries """
        args = seen_delete_parser.parse_args()
        value = args['value']
        is_seen_local = args['is_seen_local']

        if value:
            value = unquote(value)
            value = '%' + value + '%'
        seen_entries_list = seen.search(value=value,
                                        status=is_seen_local,
                                        session=session)

        if not seen_entries_list.all():
            return {'status': 'error', 'message': 'no results to delete'}, 404

        for entry in seen_entries_list:
            try:
                seen.forget_by_id(entry.id)
            except ValueError:
                return {
                    'status': 'error',
                    'message': 'Could not delete entry ID {0}'.format(entry.id)
                }, 500
        return {}
Beispiel #8
0
    def matches(self, entry, regexp, find_from=None, not_regexps=None):
        """
        Check if :entry: has any string fields or strings in a list field that match :regexp:

        :param entry: Entry instance
        :param regexp: Compiled regexp
        :param find_from: None or a list of fields to search from
        :param not_regexps: None or list of regexps that can NOT match
        :return: Field matching
        """
        unquote_fields = ['url']
        for field in find_from or ['title', 'description']:
            # Only evaluate lazy fields if find_from has been explicitly specified
            if not entry.get(field, eval_lazy=find_from):
                continue
            # Make all fields into lists for search purposes
            values = entry[field]
            if not isinstance(values, list):
                values = [values]
            for value in values:
                if not isinstance(value, basestring):
                    continue
                if field in unquote_fields:
                    value = unquote(value)
                    # If none of the not_regexps match
                if regexp.search(value):
                    # Make sure the not_regexps do not match for this field
                    for not_regexp in not_regexps or []:
                        if self.matches(entry, not_regexp, find_from=[field]):
                            entry.trace('Configured not_regexp %s matched, ignored' % not_regexp)
                            break
                    else:  # None of the not_regexps matched
                        return field
Beispiel #9
0
    def on_task_download(self, task, config):
        config = self.prepare_config(config, task)
        for entry in task.accepted:
            ftp_url = urlparse(entry.get('url'))
            ftp_url = ftp_url._replace(path=unquote(ftp_url.path))
            current_path = os.path.dirname(ftp_url.path)
            try:
                ftp = self.ftp_connect(config, ftp_url, current_path)
            except ftplib.all_errors as e:
                entry.fail("Unable to connect to server : %s" % (e))
                break

            if not os.path.isdir(config['ftp_tmp_path']):
                log.debug('creating base path: %s' % config['ftp_tmp_path'])
                os.mkdir(config['ftp_tmp_path'])

            file_name = os.path.basename(ftp_url.path)

            try:
                # Directory
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd(file_name)
                self.ftp_walk(ftp,
                              os.path.join(config['ftp_tmp_path'], file_name),
                              config, ftp_url, ftp_url.path)
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd('..')
                if config['delete_origin']:
                    ftp.rmd(file_name)
            except ftplib.error_perm:
                # File
                self.ftp_down(ftp, file_name, config['ftp_tmp_path'], config,
                              ftp_url, current_path)

            ftp.close()
Beispiel #10
0
    def on_task_download(self, task, config):
        config = self.prepare_config(config, task)
        for entry in task.accepted:
            ftp_url = urlparse(entry.get('url'))
            ftp_url = ftp_url._replace(path=unquote(ftp_url.path))
            current_path = os.path.dirname(ftp_url.path)
            try:
                ftp = self.ftp_connect(config, ftp_url, current_path)
            except ftplib.all_errors as e:
                entry.fail("Unable to connect to server : %s" % (e))
                break

            if not os.path.isdir(config['ftp_tmp_path']):
                log.debug('creating base path: %s' % config['ftp_tmp_path'])
                os.mkdir(config['ftp_tmp_path'])

            file_name = os.path.basename(ftp_url.path)

            try:
                # Directory
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd(file_name)
                self.ftp_walk(ftp, os.path.join(config['ftp_tmp_path'], file_name), config, ftp_url, ftp_url.path)
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd('..')
                if config['delete_origin']:
                    ftp.rmd(file_name)
            except ftplib.error_perm:
                # File
                self.ftp_down(ftp, file_name, config['ftp_tmp_path'], config, ftp_url, current_path)

            ftp.close()
Beispiel #11
0
  def BuildToken(request, execution_time):
    """Build an ACLToken from the request."""

    # The request.args dictionary will also be filled on HEAD calls.
    if request.method in ["GET", "HEAD"]:
      reason = request.args.get("reason", "")
    elif request.method in ["POST", "DELETE", "PATCH"]:
      # The header X-GRR-Reason is set in api-service.js.
      reason = utils.SmartUnicode(
          urlparse.unquote(request.headers.get("X-Grr-Reason", "")))

    # We assume that request.user contains the username that we can trust.
    # No matter what authentication method is used, the WebAuthManager is
    # responsible for authenticating the userand setting request.user to
    # a correct value (see gui/webauth.py).
    #
    # The token that's built here will be later used to find an API router,
    # get the ApiCallHandler from the router, and then to call the handler's
    # Handle() method. API router will be responsible for all the ACL checks.
    token = access_control.ACLToken(
        username=request.user,
        reason=reason,
        process="GRRAdminUI",
        expiry=rdfvalue.RDFDatetime.Now() + execution_time)

    for field in ["Remote_Addr", "X-Forwarded-For"]:
      remote_addr = request.headers.get(field, "")
      if remote_addr:
        token.source_ips.append(remote_addr)
    return token
Beispiel #12
0
  def BuildToken(request, execution_time):
    """Build an ACLToken from the request."""

    # The request.args dictionary will also be filled on HEAD calls.
    if request.method in ["GET", "HEAD"]:
      reason = request.args.get("reason", "")
    elif request.method in ["POST", "DELETE", "PATCH"]:
      # The header X-GRR-Reason is set in api-service.js.
      reason = utils.SmartUnicode(
          urlparse.unquote(request.headers.get("X-Grr-Reason", "")))

    # We assume that request.user contains the username that we can trust.
    # No matter what authentication method is used, the WebAuthManager is
    # responsible for authenticating the userand setting request.user to
    # a correct value (see gui/webauth.py).
    #
    # The token that's built here will be later used to find an API router,
    # get the ApiCallHandler from the router, and then to call the handler's
    # Handle() method. API router will be responsible for all the ACL checks.
    token = access_control.ACLToken(
        username=request.user,
        reason=reason,
        process="GRRAdminUI",
        expiry=rdfvalue.RDFDatetime.Now() + execution_time)

    for field in ["Remote_Addr", "X-Forwarded-For"]:
      remote_addr = request.headers.get(field, "")
      if remote_addr:
        token.source_ips.append(remote_addr)
    return token
Beispiel #13
0
    def matches(self, entry, regexp, find_from=None, not_regexps=None):
        """
        Check if :entry: has any string fields or strings in a list field that match :regexp:

        :param entry: Entry instance
        :param regexp: Compiled regexp
        :param find_from: None or a list of fields to search from
        :param not_regexps: None or list of regexps that can NOT match
        :return: Field matching
        """
        unquote_fields = ['url']
        for field in find_from or ['title', 'description']:
            # Only evaluate lazy fields if find_from has been explicitly specified
            if not entry.get(field, eval_lazy=find_from):
                continue
            # Make all fields into lists for search purposes
            values = entry[field]
            if not isinstance(values, list):
                values = [values]
            for value in values:
                if not isinstance(value, basestring):
                    value = str(value)
                if field in unquote_fields:
                    value = unquote(value)
                    # If none of the not_regexps match
                if regexp.search(value):
                    # Make sure the not_regexps do not match for this field
                    for not_regexp in not_regexps or []:
                        if self.matches(entry, not_regexp, find_from=[field]):
                            entry.trace('Configured not_regexp %s matched, ignored' % not_regexp)
                            break
                    else:  # None of the not_regexps matched
                        return field
Beispiel #14
0
    def get(self, session):
        """ Search for seen entries """
        args = seen_search_parser.parse_args()
        value = args["value"]
        page = args["page"]
        page_size = args["page_size"]
        is_seen_local = args["is_seen_local"]
        sort_by = args["sort_by"]
        order = args["order"]

        # Handle max size limit
        if page_size > 100:
            page_size = 100

        # Handles default if it explicitly called
        if order == "desc":
            order = True
        else:
            order = False

        # Unquotes and prepares value for DB lookup
        if value:
            value = unquote(value)
            value = "%{0}%".format(value)

        start = page_size * (page - 1)
        stop = start + page_size

        kwargs = {
            "value": value,
            "status": is_seen_local,
            "stop": stop,
            "start": start,
            "order_by": sort_by,
            "descending": order,
            "session": session,
        }
        count = seen.search(count=True, **kwargs)

        raw_seen_entries_list = seen.search(**kwargs)
        converted_seen_entry_list = [entry.to_dict() for entry in raw_seen_entries_list.all()]

        pages = int(ceil(count / float(page_size)))

        actual_size = min(count, page_size)

        # Invalid page request
        if page > pages and pages != 0:
            return {"status": "error", "message": "page %s does not exist" % page}, 404

        return jsonify(
            {
                "seen_entries": converted_seen_entry_list,
                "total_number_of_seen_entries": count,
                "page_size": actual_size,
                "page_number": page,
                "total_number_of_pages": pages,
            }
        )
Beispiel #15
0
    def get(self, session):
        """ Search for seen entries """
        args = seen_search_parser.parse_args()
        value = args['value']
        page = args['page']
        page_size = args['page_size']
        is_seen_local = args['is_seen_local']
        sort_by = args['sort_by']
        order = args['order']

        # Handle max size limit
        if page_size > 100:
            page_size = 100

        # Handles default if it explicitly called
        descending = bool(order == 'desc')

        # Unquotes and prepares value for DB lookup
        if value:
            value = unquote(value)
            value = '%{0}%'.format(value)

        start = page_size * (page - 1)
        stop = start + page_size

        kwargs = {
            'value': value,
            'status': is_seen_local,
            'stop': stop,
            'start': start,
            'order_by': sort_by,
            'descending': descending,
            'session': session
        }
        count = seen.search(count=True, **kwargs)

        raw_seen_entries_list = seen.search(**kwargs)
        converted_seen_entry_list = [
            entry.to_dict() for entry in raw_seen_entries_list.all()
        ]

        pages = int(ceil(count / float(page_size)))

        actual_size = min(count, page_size)

        # Invalid page request
        if page > pages and pages != 0:
            return {
                'status': 'error',
                'message': 'page %s does not exist' % page
            }, 404

        return jsonify({
            'seen_entries': converted_seen_entry_list,
            'total_number_of_seen_entries': count,
            'page_size': actual_size,
            'page_number': page,
            'total_number_of_pages': pages
        })
Beispiel #16
0
    def folder_name(self):
        if not self.folder_id:
            return None

        if self.folder_id != DEFAULT_ROOT_ID:
            return unquote(os.path.split(self.folder_path)[1])
        else:
            return '/ (Full OneDrive)'
Beispiel #17
0
    def get(self, session):
        """ Search for seen entries """
        args = seen_search_parser.parse_args()
        value = args['value']
        page = args['page']
        page_size = args['page_size']
        is_seen_local = args['is_seen_local']
        sort_by = args['sort_by']
        order = args['order']

        # Handle max size limit
        if page_size > 100:
            page_size = 100

        # Handles default if it explicitly called
        if order == 'desc':
            order = True
        else:
            order = False

        # Unquotes and prepares value for DB lookup
        if value:
            value = unquote(value)
            value = '%{0}%'.format(value)

        start = page_size * (page - 1)
        stop = start + page_size

        kwargs = {
            'value': value,
            'status': is_seen_local,
            'stop': stop,
            'start': start,
            'order_by': sort_by,
            'descending': order,
            'session': session
        }
        count = seen.search(count=True, **kwargs)

        raw_seen_entries_list = seen.search(**kwargs)
        converted_seen_entry_list = [entry.to_dict() for entry in raw_seen_entries_list.all()]

        pages = int(ceil(count / float(page_size)))

        actual_size = min(count, page_size)

        # Invalid page request
        if page > pages and pages != 0:
            return {'status': 'error',
                    'message': 'page %s does not exist' % page}, 404

        return jsonify({
            'seen_entries': converted_seen_entry_list,
            'total_number_of_seen_entries': count,
            'page_size': actual_size,
            'page_number': page,
            'total_number_of_pages': pages
        })
Beispiel #18
0
    def download_entry(self, entry, config, sftp):
        """
        Downloads the file(s) described in entry
        """

        path = unquote(urlparse(entry['url']).path) or '.'
        delete_origin = config['delete_origin']
        recursive = config['recursive']

        to = config['to']
        if to:
            try:
                to = render_from_entry(to, entry)
            except RenderError as e:
                log.error('Could not render path: %s' % to)
                entry.fail(e)
                return

        if not sftp.lexists(path):
            log.error('Remote path does not exist: %s' % path)
            return

        if sftp.isfile(path):
            source_file = remotepath.basename(path)
            source_dir = remotepath.dirname(path)
            try:
                sftp.cwd(source_dir)
                self.download_file(source_file, to, sftp, delete_origin)
            except Exception as e:
                error = 'Failed to download file %s (%s)' % (path, e)
                log.error(error)
                entry.fail(error)
        elif sftp.isdir(path):
            base_path = remotepath.normpath(remotepath.join(path, '..'))
            dir_name = remotepath.basename(path)
            handle_file = partial(self.download_file,
                                  dest=to,
                                  sftp=sftp,
                                  delete_origin=delete_origin)

            try:
                sftp.cwd(base_path)
                sftp.walktree(dir_name, handle_file, self.handle_dir,
                              self.handle_unknown, recursive)
            except Exception as e:
                error = 'Failed to download directory %s (%s)' % (path, e)
                log.error(error)
                entry.fail(error)

                return

            if delete_origin:
                self.remove_dir(sftp, path)
        else:
            log.warning('Skipping unknown file %s' % path)
Beispiel #19
0
    def download_entry(self, entry, config, sftp):
        """
        Downloads the file(s) described in entry
        """

        path = unquote(urlparse(entry['url']).path) or '.'
        delete_origin = config['delete_origin']
        recursive = config['recursive']

        to = config['to']
        if to:
            try:
                to = render_from_entry(to, entry)
            except RenderError as e:
                log.error('Could not render path: %s' % to)
                entry.fail(e)
                return

        if not sftp.lexists(path):
            log.error('Remote path does not exist: %s' % path)
            return

        if sftp.isfile(path):
            source_file = remotepath.basename(path)
            source_dir = remotepath.dirname(path)
            try:
                sftp.cwd(source_dir)
                self.download_file(source_file, to, sftp, delete_origin)
            except Exception as e:
                error = 'Failed to download file %s (%s)' % (path, e)
                log.error(error)
                entry.fail(error)
        elif sftp.isdir(path):
            base_path = remotepath.normpath(remotepath.join(path, '..'))
            dir_name = remotepath.basename(path)
            handle_file = partial(
                self.download_file, dest=to, sftp=sftp, delete_origin=delete_origin
            )

            try:
                sftp.cwd(base_path)
                sftp.walktree(
                    dir_name, handle_file, self.handle_dir, self.handle_unknown, recursive
                )
            except Exception as e:
                error = 'Failed to download directory %s (%s)' % (path, e)
                log.error(error)
                entry.fail(error)

                return

            if delete_origin:
                self.remove_dir(sftp, path)
        else:
            log.warning('Skipping unknown file %s' % path)
Beispiel #20
0
    def from_bytes(cls, bytes):
        """
        Parse a URL from some bytes.

        """

        scheme, _, rest = bytes.strip().partition(b":")

        if scheme and not rest.startswith(b"//"):
            raise InvalidURL(
                "{!r} is not a valid URL without initial '//'".format(bytes),
            )

        authority, slash, rest = rest[2:].partition(b"/")
        userinfo, _, host_and_port = authority.rpartition(b"@")
        username, _, password = userinfo.partition(b":")
        host, _, port_str = host_and_port.partition(b":")

        if not port_str:
            port = None
        else:
            try:
                port = int(unquote(port_str))
            except ValueError:
                raise InvalidURL("{!r} is not a valid port".format(port_str))

        path, _, rest = rest.partition(b"?")
        query, _, fragment = rest.partition(b"#")

        return cls.normalized(
            scheme=scheme,
            username=username,
            password=password,
            host=host,
            port=port,
            path=unquote(slash + path),
            query=parse_qs(query, keep_blank_values=True),
            fragment=unquote_plus(fragment),
            unnormalized=bytes,
            authority=authority,
            userinfo=userinfo,
        )
Beispiel #21
0
    def folder_name(self):
        if not self.folder_id:
            return None

        if self.folder_id != DEFAULT_ROOT_ID:
            # `urllib` does not properly handle unicode.
            # encode input to `str`, decode output back to `unicode`
            return unquote(os.path.split(
                self.folder_path)[1].encode('utf-8')).decode('utf-8')
        else:
            return '/ (Full OneDrive)'
Beispiel #22
0
 def versions(self):
     versions = {}
     for version in self.data.get('derivativeInfo'):
         (version, width, height, size, mimetype,
             u1, u2, u3, url, filename) = version.split(':')
         versions[version] = {
             'width': width,
             'height': height,
             'size': size,
             'mimetype': mimetype,
             'url': unquote(url),
             'filename': filename,
         }
     return versions
Beispiel #23
0
 def versions(self):
     versions = {}
     for version in self.data.get('derivativeInfo'):
         (version, width, height, size, mimetype, u1, u2, u3, url,
          filename) = version.split(':')
         versions[version] = {
             'width': width,
             'height': height,
             'size': size,
             'mimetype': mimetype,
             'url': unquote(url),
             'filename': filename,
         }
     return versions
Beispiel #24
0
    def on_task_download(self, task, config):
        config = self.prepare_config(config, task)
        for entry in task.accepted:
            ftp_url = urlparse(entry.get('url'))
            ftp_url = ftp_url._replace(path=unquote(ftp_url.path))
            current_path = os.path.dirname(ftp_url.path)
            try:
                ftp = self.ftp_connect(config, ftp_url, current_path)
            except ftplib.all_errors as e:
                entry.fail("Unable to connect to server : %s" % (e))
                break

            to_path = config['ftp_tmp_path']

            try:
                to_path = entry.render(to_path)
            except RenderError as err:
                raise plugin.PluginError(
                    "Path value replacement `%s` failed: %s" %
                    (to_path, err.args[0]))

            # Clean invalid characters with pathscrub plugin
            to_path = pathscrub(to_path)

            if not os.path.exists(to_path):
                log.debug("Creating base path: %s" % to_path)
                os.makedirs(to_path)
            if not os.path.isdir(to_path):
                raise plugin.PluginWarning(
                    "Destination `%s` is not a directory." % to_path)

            file_name = os.path.basename(ftp_url.path)

            try:
                # Directory
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd(file_name)
                self.ftp_walk(ftp, os.path.join(to_path, file_name), config,
                              ftp_url, ftp_url.path)
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd('..')
                if config['delete_origin']:
                    ftp.rmd(file_name)
            except ftplib.error_perm:
                # File
                self.ftp_down(ftp, file_name, to_path, config, ftp_url,
                              current_path)

            ftp.close()
Beispiel #25
0
def parse_link_rel(url, fn):
    """
    Read through html file ``fn`` downloaded from ``url``, looking for a
    link tag of the form:

    <link rel="alternate"
          type="application/sage"
          title="currently ignored"
          href=".../example.sws" />

    This function reads ``fn`` looking for such tags and returns a list
    of dictionaries of the form

    {'title': from title field in link, 'url': absolute URL to .sws file}

    for the corresponding ``.sws`` files. Naturally if there are no
    appropriate link tags found, the returned list is empty.
    """
    class GetLinkRelWorksheets(HTMLParser):
        def __init__(self):
            HTMLParser.__init__(self)
            self.worksheets = []

        def handle_starttag(self, tag, attrs):
            if (tag == 'link' and ('rel', 'alternate') in attrs
                    and ('type', 'application/sage') in attrs):
                self.worksheets.append({
                    'title': [_ for _ in attrs if _[0] == 'title'][0][1],
                    'url': [_ for _ in attrs if _[0] == 'href'][0][1]
                })

    parser = GetLinkRelWorksheets()
    with open(fn) as f:
        parser.feed(f.read())

    ret = []
    for d in parser.worksheets:
        sws = d['url']
        # is that link a relative URL?
        if not urlparse(sws).netloc:
            # unquote-then-quote to avoid turning %20 into %2520, etc
            ret.append({
                'url': urljoin(url, quote(unquote(sws))),
                'title': d['title']
            })
        else:
            ret.append({'url': sws, 'title': d['title']})
    return ret
Beispiel #26
0
    def delete(self, session):
        """ Delete seen entries """
        args = seen_base_parser.parse_args()
        value = args['value']
        local = args['local']

        if value:
            value = unquote(value)
            value = '%' + value + '%'
        seen_entries_list = db.search(value=value, status=local, session=session)

        deleted = 0
        for se in seen_entries_list:
            db.forget_by_id(se.id, session=session)
            deleted += 1
        return success_response('successfully deleted %i entries' % deleted)
Beispiel #27
0
    def delete(self, session):
        """ Delete seen entries """
        args = seen_base_parser.parse_args()
        value = args['value']
        local = args['local']

        if value:
            value = unquote(value)
            value = '%' + value + '%'
        seen_entries_list = db.search(value=value,
                                      status=local,
                                      session=session)

        deleted = 0
        for se in seen_entries_list:
            db.forget_by_id(se.id, session=session)
            deleted += 1
        return success_response('successfully deleted %i entries' % deleted)
    def on_task_download(self, task, config):
        config = self.prepare_config(config, task)
        for entry in task.accepted:
            ftp_url = urlparse(entry.get('url'))
            ftp_url = ftp_url._replace(path=unquote(ftp_url.path))
            current_path = os.path.dirname(ftp_url.path)
            try:
                ftp = self.ftp_connect(config, ftp_url, current_path)
            except ftplib.all_errors as e:
                entry.fail("Unable to connect to server : %s" % (e))
                break

            to_path = config['ftp_tmp_path']

            try:
                to_path = entry.render(to_path)
            except RenderError as err:
                raise plugin.PluginError("Path value replacement `%s` failed: %s" % (to_path, err.args[0]))

            # Clean invalid characters with pathscrub plugin
            to_path = pathscrub(to_path)

            if not os.path.exists(to_path):
                log.debug("Creating base path: %s" % to_path)
                os.makedirs(to_path)
            if not os.path.isdir(to_path):
                raise plugin.PluginWarning("Destination `%s` is not a directory." % to_path)

            file_name = os.path.basename(ftp_url.path)

            try:
                # Directory
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd(file_name)
                self.ftp_walk(ftp, os.path.join(to_path, file_name), config, ftp_url, ftp_url.path)
                ftp = self.check_connection(ftp, config, ftp_url, current_path)
                ftp.cwd('..')
                if config['delete_origin']:
                    ftp.rmd(file_name)
            except ftplib.error_perm:
                # File
                self.ftp_down(ftp, file_name, to_path, config, ftp_url, current_path)

            ftp.close()
Beispiel #29
0
def lint():
    """Run linter on the provided text and return the results."""
    if 'text' in request.values:
        text = unquote(request.values['text'])
        print(text)
        try:
            job = q.enqueue(worker_function, text)
            print(job)

            return jsonify(job_id=job.id), 202
        except Exception as e:
            print(e)
            return False

    elif 'job_id' in request.values:
        job = q.fetch_job(request.values['job_id'])

        if not job:
            return jsonify(status="error",
                           message="No job with requested job_id."), 404

        elif job.result is None:
            return jsonify(status="error",
                           message="Job is not yet ready."), 202

        else:
            errors = []
            for i, e in enumerate(job.result):
                app.logger.debug(e)
                errors.append({
                    "check": e[0],
                    "message": e[1],
                    "line": e[2],
                    "column": e[3],
                    "start": e[4],
                    "end": e[5],
                    "extent": e[5] - e[4],
                    "severity": e[7],
                    "replacements": e[8],
                    "source_name": "",
                    "source_url": "",
                })
            return jsonify(status="success", data={"errors": errors})
Beispiel #30
0
    def delete(self, session):
        """ Delete seen entries """
        args = seen_delete_parser.parse_args()
        value = args["value"]
        is_seen_local = args["is_seen_local"]

        if value:
            value = unquote(value)
            value = "%" + value + "%"
        seen_entries_list = seen.search(value=value, status=is_seen_local, session=session)

        if not seen_entries_list.all():
            return {"status": "error", "message": "no results to delete"}, 404

        for entry in seen_entries_list:
            try:
                seen.forget_by_id(entry.id)
            except ValueError:
                return {"status": "error", "message": "Could not delete entry ID {0}".format(entry.id)}, 500
        return {}
Beispiel #31
0
def lint():
    """Run linter on the provided text and return the results."""
    if 'text' in request.values:
        text = unquote(request.values['text'])
        job = q.enqueue(worker_function, text)

        return jsonify(job_id=job.id), 202

    elif 'job_id' in request.values:
        job = q.fetch_job(request.values['job_id'])

        if not job:
            return jsonify(
                status="error",
                message="No job with requested job_id."), 404

        elif job.result is None:
            return jsonify(
                status="error",
                message="Job is not yet ready."), 202

        else:
            errors = []
            for i, e in enumerate(job.result):
                app.logger.debug(e)
                errors.append({
                    "check": e[0],
                    "message": e[1],
                    "line": e[2],
                    "column": e[3],
                    "start": e[4],
                    "end": e[5],
                    "extent": e[5] - e[4],
                    "severity": e[7],
                    "replacements": e[8],
                    "source_name": "",
                    "source_url": "",
                })
            return jsonify(
                status="success",
                data={"errors": errors})
Beispiel #32
0
    def delete(self, session):
        """ Delete seen entries """
        args = seen_delete_parser.parse_args()
        value = args['value']
        is_seen_local = args['is_seen_local']

        if value:
            value = unquote(value)
            value = '%' + value + '%'
        seen_entries_list = seen.search(value=value, status=is_seen_local, session=session)

        if not seen_entries_list.all():
            return {'status': 'error',
                    'message': 'no results to delete'}, 404

        for entry in seen_entries_list:
            try:
                seen.forget_by_id(entry.id)
            except ValueError as e:
                return {'status': 'error',
                        'message': 'Could not delete entry ID {0}'.format(entry.id)}, 500
        return {}
Beispiel #33
0
 def get_loginform(self, redirect_uri=''):
     from plexpy.webserve import serve_template
     return serve_template(templatename="login.html", title="Login", redirect_uri=unquote(redirect_uri))
def unprocess_payload(data):
    return process_data(
        data, lambda value: unquote(value.encode('utf-8') if value else ''))
Beispiel #35
0
    def download_entry(self, task, entry, url, tmp_path):
        """Downloads `entry` by using `url`.

        :raises: Several types of exceptions ...
        :raises: PluginWarning
        """

        log.debug('Downloading url \'%s\'', url)

        # get content
        auth = None
        if 'download_auth' in entry:
            auth = entry['download_auth']
            log.debug('Custom auth enabled for %s download: %s', entry['title'], entry['download_auth'])

        try:
            response = task.requests.get(url, auth=auth, raise_status=False)
        except UnicodeError:
            log.error('Unicode error while encoding url %s', url)
            return
        if response.status_code != 200:
            log.debug('Got %s response from server. Saving error page.', response.status_code)
            # Save the error page
            if response.content:
                self.save_error_page(entry, task, response.content)
            # Raise the error
            response.raise_for_status()
            return

        # expand ~ in temp path
        # TODO jinja?
        try:
            tmp_path = os.path.expanduser(tmp_path)
        except RenderError as e:
            entry.fail('Could not set temp path. Error during string replacement: %s' % e)
            return

        # Clean illegal characters from temp path name
        tmp_path = pathscrub(tmp_path)

        # create if missing
        if not os.path.isdir(tmp_path):
            log.debug('creating tmp_path %s' % tmp_path)
            os.mkdir(tmp_path)

        # check for write-access
        if not os.access(tmp_path, os.W_OK):
            raise plugin.PluginError('Not allowed to write to temp directory `%s`' % tmp_path)

        # download and write data into a temp file
        tmp_dir = tempfile.mkdtemp(dir=tmp_path)
        fname = hashlib.md5(url.encode('utf-8', 'replace')).hexdigest()
        datafile = os.path.join(tmp_dir, fname)
        outfile = io.open(datafile, 'wb')
        try:
            for chunk in response.iter_content(chunk_size=150 * 1024, decode_unicode=False):
                outfile.write(chunk)
        except Exception as e:
            # don't leave futile files behind
            # outfile has to be closed before we can delete it on Windows
            outfile.close()
            log.debug('Download interrupted, removing datafile')
            os.remove(datafile)
            if isinstance(e, socket.timeout):
                log.error('Timeout while downloading file')
            else:
                raise
        else:
            outfile.close()
            # Do a sanity check on downloaded file
            if os.path.getsize(datafile) == 0:
                entry.fail('File %s is 0 bytes in size' % datafile)
                os.remove(datafile)
                return
            # store temp filename into entry so other plugins may read and modify content
            # temp file is moved into final destination at self.output
            entry['file'] = datafile
            log.debug('%s field file set to: %s', entry['title'], entry['file'])

        if 'content-type' in response.headers:
            entry['mime-type'] = str(parse_header(response.headers['content-type'])[0])
        else:
            entry['mime-type'] = "unknown/unknown"

        content_encoding = response.headers.get('content-encoding', '')
        decompress = 'gzip' in content_encoding or 'deflate' in content_encoding
        if 'content-length' in response.headers and not decompress:
            entry['content-length'] = int(response.headers['content-length'])

        # prefer content-disposition naming, note: content-disposition can be disabled completely
        # by setting entry field `content-disposition` to False
        if entry.get('content-disposition', True):
            self.filename_from_headers(entry, response)
        else:
            log.info('Content-disposition disabled for %s', entry['title'])
        self.filename_ext_from_mime(entry)

        if not entry.get('filename'):
            filename = unquote(url.rsplit('/', 1)[1])
            log.debug('No filename - setting from url: %s', filename)
            entry['filename'] = filename
        log.debug('Finishing download_entry() with filename %s', entry.get('filename'))
def scrape_image_urls(keywords, number=None, face_only=False, safe_mode=False, proxy=None, proxy_type="http"):
    print("\nScraping From Google Image Search ...\n")
    print("Keywords:\t" + keywords)
    base_url = "https://www.google.com/search?tbm=isch"
    keywords_str = "&q=" + "+".join(keywords.split())

    query_url = base_url + keywords_str

    if number is None:
        print("Number:\t\tNo limit")
    else:
        print("Number:\t\t" + str(number))

    if face_only is True:
        query_url += "&tbs=itp:face"
        print("Face Only:\tYes")
    else:
        print("Face Only:\tNo")

    if safe_mode is True:
        query_url += "&safe=on"
        print("Safe Mode:\tOn")
    else:
        query_url += "&safe=off"
        print("Safe Mode:\tOff")

    print("Query URL:\t" + query_url)

    phantomjs_args = list()

    if proxy is not None:
        phantomjs_args = [
            "--proxy=" + proxy,
            "--proxy-type=" + proxy_type,
            ]
    driver = webdriver.PhantomJS(executable_path="/opt/phantomjs-2.1.1/bin/phantomjs",
                                 service_args=phantomjs_args, desired_capabilities=dcap)
    driver.set_window_size(10000, 7500)
    driver.get(query_url)

    last_image_count = 0
    retry_times = 0

    time.sleep(3)

    while True:
        img_count = driver.find_elements_by_class_name("rg_l").__len__()
        if img_count > last_image_count:
            if retry_times > 5:
                break
            else:
                retry_times += 1
        else:
            last_image_count = img_count
            retry_times = 0
        time.sleep(0.5)

    image_elements = driver.find_elements_by_class_name("rg_l")

    image_urls = list()

    url_pattern = "imgurl=\S*&amp;imgrefurl"

    for image_element in image_elements:
        outer_html = image_element.get_attribute("outerHTML")
        re_group = re.search(url_pattern, outer_html)
        if re_group is not None:
            image_url = unquote(re_group.group()[7:-14])
            image_urls.append(image_url)

    if number is not None and number > image_urls.__len__():
        number = image_urls.__len__()

    print("\nTotal {0} images scraped, {1} will be used.\n".format(image_urls.__len__(), number))

    return image_urls[0:number]
Beispiel #37
0
def resolve_guid(guid, suffix=None):
    """Load GUID by primary key, look up the corresponding view function in the
    routing table, and return the return value of the view function without
    changing the URL.

    :param str guid: GUID primary key
    :param str suffix: Remainder of URL after the GUID
    :return: Return value of proxied view function
    """
    try:
        # Look up
        guid_object = Guid.load(guid)
    except KeyError as e:
        if e.message == 'osfstorageguidfile':  # Used when an old detached OsfStorageGuidFile object is accessed
            raise HTTPError(http_status.HTTP_404_NOT_FOUND)
        else:
            raise e
    if guid_object:
        # verify that the object implements a GuidStoredObject-like interface. If a model
        #   was once GuidStoredObject-like but that relationship has changed, it's
        #   possible to have referents that are instances of classes that don't
        #   have a deep_url attribute or otherwise don't behave as
        #   expected.
        if not hasattr(guid_object.referent, 'deep_url'):
            sentry.log_message('Guid resolved to an object with no deep_url',
                               dict(guid=guid))
            raise HTTPError(http_status.HTTP_404_NOT_FOUND)
        referent = guid_object.referent
        if referent is None:
            logger.error('Referent of GUID {0} not found'.format(guid))
            raise HTTPError(http_status.HTTP_404_NOT_FOUND)
        if not referent.deep_url:
            raise HTTPError(http_status.HTTP_404_NOT_FOUND)

        # Handle file `/download` shortcut with supported types.
        if suffix and suffix.rstrip('/').lower() == 'download':
            file_referent = None
            if isinstance(referent, Preprint) and referent.primary_file:
                file_referent = referent.primary_file
            elif isinstance(referent, BaseFileNode) and referent.is_file:
                file_referent = referent

            if file_referent:
                if isinstance(
                        file_referent.target,
                        Preprint) and not file_referent.target.is_published:
                    # TODO: Ideally, permissions wouldn't be checked here.
                    # This is necessary to prevent a logical inconsistency with
                    # the routing scheme - if a preprint is not published, only
                    # admins and moderators should be able to know it exists.
                    auth = Auth.from_kwargs(request.args.to_dict(), {})
                    # Check if user isn't a nonetype or that the user has admin/moderator/superuser permissions
                    if auth.user is None or not (
                            auth.user.has_perm('view_submissions',
                                               file_referent.target.provider)
                            or file_referent.target.has_permission(
                                auth.user, permissions.ADMIN)):
                        raise HTTPError(http_status.HTTP_404_NOT_FOUND)

                # Extend `request.args` adding `action=download`.
                request.args = request.args.copy()
                request.args.update({'action': 'download'})
                # Do not include the `download` suffix in the url rebuild.
                url = _build_guid_url(unquote(file_referent.deep_url))
                return proxy_url(url)

        # Handle Ember Applications
        if isinstance(referent, Preprint):
            if referent.provider.domain_redirect_enabled:
                # This route should always be intercepted by nginx for the branded domain,
                # w/ the exception of `<guid>/download` handled above.
                return redirect(referent.absolute_url,
                                http_status.HTTP_301_MOVED_PERMANENTLY)

            if PROXY_EMBER_APPS:
                resp = requests.get(EXTERNAL_EMBER_APPS['preprints']['server'],
                                    stream=True,
                                    timeout=EXTERNAL_EMBER_SERVER_TIMEOUT)
                return Response(stream_with_context(resp.iter_content()),
                                resp.status_code)

            return send_from_directory(preprints_dir, 'index.html')

        if isinstance(referent,
                      BaseFileNode) and referent.is_file and (getattr(
                          referent.target, 'is_quickfiles', False)):
            if referent.is_deleted:
                raise HTTPError(http_status.HTTP_410_GONE)
            if PROXY_EMBER_APPS:
                resp = requests.get(
                    EXTERNAL_EMBER_APPS['ember_osf_web']['server'],
                    stream=True,
                    timeout=EXTERNAL_EMBER_SERVER_TIMEOUT)
                return Response(stream_with_context(resp.iter_content()),
                                resp.status_code)

            return send_from_directory(ember_osf_web_dir, 'index.html')

        if isinstance(
                referent,
                Registration) and (not suffix or suffix.rstrip('/').lower()
                                   in ('comments', 'links', 'components')):
            if flag_is_active(request, features.EMBER_REGISTRIES_DETAIL_PAGE):
                # Route only the base detail view to ember
                if PROXY_EMBER_APPS:
                    resp = requests.get(
                        EXTERNAL_EMBER_APPS['ember_osf_web']['server'],
                        stream=True,
                        timeout=EXTERNAL_EMBER_SERVER_TIMEOUT)
                    return Response(stream_with_context(resp.iter_content()),
                                    resp.status_code)

                return send_from_directory(ember_osf_web_dir, 'index.html')

        url = _build_guid_url(unquote(referent.deep_url), suffix)
        return proxy_url(url)

    # GUID not found; try lower-cased and redirect if exists
    guid_object_lower = Guid.load(guid.lower())
    if guid_object_lower:
        return redirect(_build_guid_url(guid.lower(), suffix))

    # GUID not found
    raise HTTPError(http_status.HTTP_404_NOT_FOUND)
Beispiel #38
0
    def _initParameters(self):
        self._owner = self.request['userName']

        # SPL-107168 Need to populate namespace & owner beforehand which are needed
        # to generate context specific URI while initializing conf defaults

        # get namespace/owner
        self._namespace = self.args.get(self.ARG_INPUT_NAMESPACE)
        if self.ARG_INPUT_OWNER in self.args:
            self._owner = self.args.get(self.ARG_INPUT_OWNER)

        self._initArgs()
        self._initWebDefaults()
        self._initLimitsDefaults()
        self._initAlertActionsDefaults()

        # initialize view type
        # the order matters, check dashboard xml first
        if self.ARG_INPUT_DASHBOARD_XML in self.args:
            self._dashboardXml = unquote(
                self.args.get(self.ARG_INPUT_DASHBOARD_XML))
            self._viewType = self.VIEW_TYPE_DASHBOARD
            self._dashboardName = self.args.get(self.ARG_INPUT_DASHBOARD)
            logger.debug("pdfgen/render xml=%s" % self._dashboardXml)
        elif self.ARG_INPUT_DASHBOARD in self.args:
            self._dashboardName = self.args.get(self.ARG_INPUT_DASHBOARD)
            self._viewType = self.VIEW_TYPE_DASHBOARD
        elif self.ARG_INPUT_REPORT in self.args:
            self._reportName = self.args.get(self.ARG_INPUT_REPORT)
            self._viewType = self.VIEW_TYPE_REPORT
        elif self.ARG_INPUT_SEARCH in self.args:
            self._searchStr = self.args.get(self.ARG_INPUT_SEARCH,
                                            "No search query specified")
            self._et = self.args.get(self.ARG_INPUT_ET, 0)
            self._lt = self.args.get(self.ARG_INPUT_LT, '')

            # if et or lt is 0.000 change it to 0
            if float(self._et) == 0.0:
                logger.debug("_et was %s, updating it to '0'" % self._et)
                self._et = '0'

            if self._lt and float(self._lt) == 0.0:
                logger.debug("_lt was %s, updating it to '0'" % self._lt)
                self._lt = '0'

            self._reportName = 'Splunk search results'
            self._viewType = self.VIEW_TYPE_SEARCH

        # initialize papersize
        if self.ARG_INPUT_PAPERSIZE in self.args:
            paperSizeArg = self.args.get(self.ARG_INPUT_PAPERSIZE).lower()
            if paperSizeArg in pdfrenderer.PAPERSIZES:
                self._paperSize = paperSizeArg
            else:
                logger.warn('Invalid paper size "%s"' % paperSizeArg)
                raise ArgError(
                    "Paper size is not valid. Please check the pdfgen.log file for more information."
                )
        logger.debug("pdf-init paper-size=%s" % self._paperSize)

        # initialize include-splunk-logo
        self._includeSplunkLogo = normalizeBoolean(
            self.args.get(self.ARG_INPUT_INCLUDE_SPLUNK_LOGO,
                          self._includeSplunkLogo))
        logger.debug("pdf-init include-splunk-logo=%s" %
                     self._includeSplunkLogo)

        # initialize max-row-per-table
        if self.ARG_INPUT_MAX_ROWS_PER_TABLE in self.args:
            maxRowsPerTableArg = self.args.get(
                self.ARG_INPUT_MAX_ROWS_PER_TABLE)
            try:
                self._maxRowsPerTable = int(maxRowsPerTableArg)
            except:
                logger.warn(
                    'Max-rows-per-table="%s" is invalid, must be an integer' %
                    maxRowsPerTableArg)
                raise ArgError(
                    "max-rows-per-table is invalid, must be an integer. Please check the pdfgen.log file for more information."
                )
        logger.debug("pdf-init max-rows-per-table=%s" %
                     (str(self._maxRowsPerTable)))

        # initialize timeout
        if self.ARG_INPUT_TIMEOUT in self.args:
            self._timeoutDuration = int(self.args.get(self.ARG_INPUT_TIMEOUT))
        logger.debug("pdf-init timeoutDuration=%s" % self._timeoutDuration)
        self._startTimeoutClock()

        if self.ARG_INPUT_REPORT_FILE_NAME in self.args:
            self._fileNamePattern = self.args.get(
                self.ARG_INPUT_REPORT_FILE_NAME)
            logger.debug("pdf-init report-file-name=%s" %
                         self._fileNamePattern)

        # initialize time of report
        self._initTimeOfReport()

        # check for SIDs
        if self._viewType is self.VIEW_TYPE_REPORT:
            if self.ARG_INPUT_SID in self.args:
                self._inputSids[0] = self.args.get(self.ARG_INPUT_SID)
        else:
            for argK, argV in self.args.items():
                if self.ARG_INPUT_SID in argK:
                    # we want the panel sequence number which is retrieved from "sid_<seqNum>"
                    match = self.sidRE.match(argK)
                    if match != None and len(match.groups(0)) > 0:
                        seqNum = match.groups(0)[0]
                        if len(seqNum) > 0:
                            self._inputSids[int(seqNum)] = argV
                            logger.debug("sid seqNum=%s value=%s" %
                                         (seqNum, argV))

        # allow override from http arguments
        for validArgs in pdfrenderer.ALL_PDF_SETTINGS:
            v = self.args.get(validArgs) or self._pdfSettings.get(validArgs)
            if v is not None:
                v = v.strip()
                # SPL-98329 convert value into lowercase except logo path
                if validArgs != pdfrenderer.SETTING_LOGO_PATH:
                    v = v.lower()
                if validArgs in pdfrenderer.PDF_BOOLEAN_SETTINGS:
                    self._pdfSettings[validArgs] = normalizeBoolean(v)
                else:
                    self._pdfSettings[validArgs] = v

        logger.debug("pdfSettings=%s" % (str(self._pdfSettings)))

        self._validateParameters()

        self._timestampStr = splunk.search.searchUtils.getFormattedTimeForUser(
            self.sessionKey, now=self._now, timeFormat='%F %T %Z')

        self._locale = self.args.get(self.ARG_INPUT_LOCALE)
        logger.info("pdf-init locale=%s" % self._locale)

        self._server_zoneinfo = self.args.get(self.ARG_INPUT_TIMEZONE)
        if self._server_zoneinfo is None:
            self._server_zoneinfo = toDefaultStrings(
                rest.simpleRequest('/services/search/timeparser/tz',
                                   sessionKey=self.sessionKey)[1])
        logger.info("pdf-init server_zoneinfo=%s" % self._server_zoneinfo)
Beispiel #39
0
def validate_uri(digest_uri, request_path):
    digest_url_components = urlparse(digest_uri)
    return unquote(digest_url_components[2]) == request_path
Beispiel #40
0
    def get_synced_items(self, machine_id=None, client_id_filter=None, user_id_filter=None,
                         rating_key_filter=None, sync_id_filter=None):

        if not machine_id:
            machine_id = plexpy.CONFIG.PMS_IDENTIFIER

        if isinstance(rating_key_filter, list):
            rating_key_filter = [str(k) for k in rating_key_filter]
        elif rating_key_filter:
            rating_key_filter = [str(rating_key_filter)]

        if isinstance(user_id_filter, list):
            user_id_filter = [str(k) for k in user_id_filter]
        elif user_id_filter:
            user_id_filter = [str(user_id_filter)]

        sync_list = self.get_plextv_sync_lists(machine_id, output_format='xml')
        user_data = users.Users()

        synced_items = []

        try:
            xml_head = sync_list.getElementsByTagName('SyncList')
        except Exception as e:
            logger.warn("Tautulli PlexTV :: Unable to parse XML for get_synced_items: %s." % e)
            return {}

        for a in xml_head:
            client_id = helpers.get_xml_attr(a, 'clientIdentifier')

            # Filter by client_id
            if client_id_filter and str(client_id_filter) != client_id:
                continue

            sync_list_id = helpers.get_xml_attr(a, 'id')
            sync_device = a.getElementsByTagName('Device')

            for device in sync_device:
                device_user_id = helpers.get_xml_attr(device, 'userID')
                try:
                    device_username = user_data.get_details(user_id=device_user_id)['username']
                    device_friendly_name = user_data.get_details(user_id=device_user_id)['friendly_name']
                except:
                    device_username = ''
                    device_friendly_name = ''
                device_name = helpers.get_xml_attr(device, 'name')
                device_product = helpers.get_xml_attr(device, 'product')
                device_product_version = helpers.get_xml_attr(device, 'productVersion')
                device_platform = helpers.get_xml_attr(device, 'platform')
                device_platform_version = helpers.get_xml_attr(device, 'platformVersion')
                device_type = helpers.get_xml_attr(device, 'device')
                device_model = helpers.get_xml_attr(device, 'model')
                device_last_seen = helpers.get_xml_attr(device, 'lastSeenAt')

            # Filter by user_id
            if user_id_filter and device_user_id not in user_id_filter:
                continue

            for synced in a.getElementsByTagName('SyncItems'):
                sync_item = synced.getElementsByTagName('SyncItem')
                for item in sync_item:

                    sync_media_type = None
                    rating_key = None
                    for location in item.getElementsByTagName('Location'):
                        location_uri = unquote(helpers.get_xml_attr(location, 'uri'))

                        if location_uri.startswith('library://'):
                            if 'collection' in location_uri:
                                sync_media_type = 'collection'
                            clean_uri = location_uri.split('/')
                            rating_key = next((j for i, j in zip(clean_uri[:-1], clean_uri[1:])
                                              if i in ('metadata', 'collections')), None)

                        elif location_uri.startswith('playlist://'):
                            sync_media_type = 'playlist'
                            tokens = users.Users().get_tokens(user_id=device_user_id)
                            if tokens['server_token']:
                                plex = Plex(token=tokens['server_token'])
                                for playlist in plex.PlexServer.playlists():
                                    if location_uri.endswith(playlist.guid):
                                        rating_key = str(playlist.ratingKey)  # String for backwards consistency

                    # Filter by rating_key
                    if rating_key_filter and rating_key not in rating_key_filter:
                        continue

                    sync_id = helpers.get_xml_attr(item, 'id')

                    # Filter by sync_id
                    if sync_id_filter and str(sync_id_filter) != sync_id:
                        continue

                    sync_version = helpers.get_xml_attr(item, 'version')
                    sync_root_title = helpers.get_xml_attr(item, 'rootTitle')
                    sync_title = helpers.get_xml_attr(item, 'title')
                    sync_metadata_type = helpers.get_xml_attr(item, 'metadataType')
                    sync_content_type = helpers.get_xml_attr(item, 'contentType')

                    for status in item.getElementsByTagName('Status'):
                        status_failure_code = helpers.get_xml_attr(status, 'failureCode')
                        status_failure = helpers.get_xml_attr(status, 'failure')
                        status_state = helpers.get_xml_attr(status, 'state')
                        status_item_count = helpers.get_xml_attr(status, 'itemsCount')
                        status_item_complete_count = helpers.get_xml_attr(status, 'itemsCompleteCount')
                        status_item_downloaded_count = helpers.get_xml_attr(status, 'itemsDownloadedCount')
                        status_item_ready_count = helpers.get_xml_attr(status, 'itemsReadyCount')
                        status_item_successful_count = helpers.get_xml_attr(status, 'itemsSuccessfulCount')
                        status_total_size = helpers.get_xml_attr(status, 'totalSize')
                        status_item_download_percent_complete = helpers.get_percent(
                            status_item_downloaded_count, status_item_count)

                    for settings in item.getElementsByTagName('MediaSettings'):
                        settings_video_bitrate = helpers.get_xml_attr(settings, 'maxVideoBitrate')
                        settings_video_quality = helpers.get_xml_attr(settings, 'videoQuality')
                        settings_video_resolution = helpers.get_xml_attr(settings, 'videoResolution')
                        settings_audio_boost = helpers.get_xml_attr(settings, 'audioBoost')
                        settings_audio_bitrate = helpers.get_xml_attr(settings, 'musicBitrate')
                        settings_photo_quality = helpers.get_xml_attr(settings, 'photoQuality')
                        settings_photo_resolution = helpers.get_xml_attr(settings, 'photoResolution')

                    sync_details = {"device_name": device_name,
                                    "platform": device_platform,
                                    "user_id": device_user_id,
                                    "user": device_friendly_name,
                                    "username": device_username,
                                    "root_title": sync_root_title,
                                    "sync_title": sync_title,
                                    "metadata_type": sync_metadata_type,
                                    "content_type": sync_content_type,
                                    "rating_key": rating_key,
                                    "state": status_state,
                                    "item_count": status_item_count,
                                    "item_complete_count": status_item_complete_count,
                                    "item_downloaded_count": status_item_downloaded_count,
                                    "item_downloaded_percent_complete": status_item_download_percent_complete,
                                    "video_bitrate": settings_video_bitrate,
                                    "audio_bitrate": settings_audio_bitrate,
                                    "photo_quality": settings_photo_quality,
                                    "video_quality": settings_video_quality,
                                    "total_size": status_total_size,
                                    "failure": status_failure,
                                    "client_id": client_id,
                                    "sync_id": sync_id,
                                    "sync_media_type": sync_media_type
                                    }

                    synced_items.append(sync_details)

        return session.filter_session_info(synced_items, filter_key='user_id')
Beispiel #41
0
    def get(self, session):
        """ Search for seen entries """
        args = seen_search_parser.parse_args()

        # Filter params
        value = args['value']
        local = args['local']

        # Pagination and sorting params
        page = args['page']
        per_page = args['per_page']
        sort_by = args['sort_by']
        sort_order = args['order']

        # Handle max size limit
        if per_page > 100:
            per_page = 100

        descending = sort_order == 'desc'

        # Unquotes and prepares value for DB lookup
        if value:
            value = unquote(value)
            value = '%{0}%'.format(value)

        start = per_page * (page - 1)
        stop = start + per_page

        kwargs = {
            'value': value,
            'status': local,
            'stop': stop,
            'start': start,
            'order_by': sort_by,
            'descending': descending,
            'session': session,
        }

        total_items = db.search(count=True, **kwargs)

        if not total_items:
            return jsonify([])

        raw_seen_entries_list = db.search(**kwargs).all()

        converted_seen_entry_list = [
            entry.to_dict() for entry in raw_seen_entries_list
        ]

        # Total number of pages
        total_pages = int(ceil(total_items / float(per_page)))

        # Actual results in page
        actual_size = min(len(converted_seen_entry_list), per_page)

        # Invalid page request
        if page > total_pages and total_pages != 0:
            raise NotFoundError('page %s does not exist' % page)

        # Get pagination headers
        pagination = pagination_headers(total_pages, total_items, actual_size,
                                        request)

        # Create response
        rsp = jsonify(converted_seen_entry_list)

        # Add link header to response
        rsp.headers.extend(pagination)
        return rsp
Beispiel #42
0
    def get(self, session):
        """ Search for seen entries """
        args = seen_search_parser.parse_args()

        # Filter params
        value = args['value']
        local = args['local']

        # Pagination and sorting params
        page = args['page']
        per_page = args['per_page']
        sort_by = args['sort_by']
        sort_order = args['order']

        # Handle max size limit
        if per_page > 100:
            per_page = 100

        descending = sort_order == 'desc'

        # Unquotes and prepares value for DB lookup
        if value:
            value = unquote(value)
            value = '%{0}%'.format(value)

        start = per_page * (page - 1)
        stop = start + per_page

        kwargs = {
            'value': value,
            'status': local,
            'stop': stop,
            'start': start,
            'order_by': sort_by,
            'descending': descending,
            'session': session,
        }

        total_items = db.search(count=True, **kwargs)

        if not total_items:
            return jsonify([])

        raw_seen_entries_list = db.search(**kwargs).all()

        converted_seen_entry_list = [entry.to_dict() for entry in raw_seen_entries_list]

        # Total number of pages
        total_pages = int(ceil(total_items / float(per_page)))

        # Actual results in page
        actual_size = min(len(converted_seen_entry_list), per_page)

        # Invalid page request
        if page > total_pages and total_pages != 0:
            raise NotFoundError('page %s does not exist' % page)

        # Get pagination headers
        pagination = pagination_headers(total_pages, total_items, actual_size, request)

        # Create response
        rsp = jsonify(converted_seen_entry_list)

        # Add link header to response
        rsp.headers.extend(pagination)
        return rsp
Beispiel #43
0
    def handle_POST(self):
        """
        Install a remote application in response to an HTTP POST.
        """
        self.verifyAllowRemote()
        parts = len(self.pathParts)
        if parts == self.BASE_DEPTH + 2:
            default_version = True
        elif parts == self.BASE_DEPTH + 3:
            default_version = False
        else:
            raise splunk.BadRequest
        if HTTP_AUTH_TOKEN not in self.args:
            raise splunk.BadRequest("Missing argument: %s" % HTTP_AUTH_TOKEN)
        if HTTP_ACTION not in self.args:
            raise splunk.BadRequest("Missing argument: %s" % HTTP_ACTION)
        if self.args[HTTP_ACTION] not in (HTTP_ACTION_INSTALL,
                                          HTTP_ACTION_DOWNLOAD):
            raise splunk.BadRequest("Invalid value '%s' for argument '%s'" %
                                    (self.args[HTTP_ACTION], HTTP_ACTION))
        # check if this is a cloud stack
        if isCloud(self.sessionKey):
            app_name = self.pathParts[self.BASE_DEPTH + 1]
            # Get all cloud apps and see if the app being installed is vetted for cloud
            # i.e install_method == simple
            # TODO: Change to just querying for the app in question when BASE-4074
            # is finished.
            getargs = {'offset': 0, 'limit': 100}
            vetted_apps = []
            while 1:
                serverResponse, serverContent = splunk.rest.simpleRequest(
                    VETTED_APPS_URI, self.sessionKey, getargs)
                if serverResponse.status != 200:
                    raise splunk.BadRequest(
                        'Error while querying Splunkbase. Splunkd returned %s'
                        % serverContent)
                vetted_app_data = json.loads(serverContent)
                if not vetted_app_data['results']:
                    break
                else:
                    getargs['offset'] += 100
                    vetted_apps.extend(vetted_app_data['results'])
            for app in vetted_apps:
                if app['appid'] == app_name and app[
                        'install_method'] == VETTED_APP_INSTALL_METHOD:
                    break
            else:
                raise splunk.BadRequest(
                    'App %s is not vetted for Splunk Cloud.' % app_name)

        url = self._native_to_foreign_url()
        root = self._get_feed_root(url)
        if default_version:
            root = self._get_latest_version(root)
        href = self._parse_link(root)

        try:
            # Package up a Request with auth information.
            req = Request(href)
            # XXX: Converting the auth token from a POST arg to a header
            # requires us to unquote() it. If the client did not correctly
            # quote() the token, login will fail.
            req.add_header(HTTP_AUTH_HEADER,
                           unquote(self.args[HTTP_AUTH_TOKEN]))
            # Install using this Request object.
            installer = bundle_paths.BundleInstaller()
            if self.args[HTTP_ACTION] == HTTP_ACTION_INSTALL:
                b, status = installer.install_from_url(req,
                                                       sslpol=self._sslpol)
                self.response.setStatus(status)
                if ((status == bundle_paths.BundleInstaller.STATUS_INSTALLED)
                        or
                    (status == bundle_paths.BundleInstaller.STATUS_UPGRADED)):
                    # Migrate old-style bundles.
                    logger.debug("Configuring application contents")
                    try:
                        b.migrate()
                    except Exception as e:
                        logger.exception(e)
                        self.addMessage("WARN",
                                        "Error during configuration: %s" % e)
                    # Redirect to local application.
                    self.response.setHeader("Location",
                                            self._redirect_to_local(b))
                    # Let splunkd know about newly-installed app.
                    logger.debug(
                        "Notifying splunkd that app has been installed")
                    splunk.rest.simpleRequest('apps/local/_reload',
                                              sessionKey=self.sessionKey)
                if status == bundle_paths.BundleInstaller.STATUS_INSTALLED:
                    self.addMessage("INFO",
                                    "Installed application: %s" % b.name())
                elif status == bundle_paths.BundleInstaller.STATUS_UPGRADED:
                    self.addMessage("INFO",
                                    "Upgraded application: %s" % b.name())
                else:
                    self.addMessage(
                        "WARN", "Could not install application: %s" % b.name())
            else:
                assert self.args[HTTP_ACTION] == HTTP_ACTION_DOWNLOAD
                downloaded = installer.download_from_url(req,
                                                         sslpol=self._sslpol)
                self.addMessage("INFO",
                                "Downloaded application file: %s" % downloaded)
                self.response.setHeader('content-type', 'application/json')
                response_json = {"downloaded": downloaded}
                self.response.write(json.dumps(response_json))

        except splunk.ResourceNotFound:
            raise
        except splunk.AuthorizationFailed:
            raise
        except splunk.InternalServerError:
            raise
        except Exception as e:
            logger.exception(e)
            raise splunk.InternalServerError(e)
Beispiel #44
0
    def download_entry(self, task, entry, url, tmp_path):
        """Downloads `entry` by using `url`.

        :raises: Several types of exceptions ...
        :raises: PluginWarning
        """

        log.debug('Downloading url \'%s\'', url)

        # get content
        auth = None
        if 'download_auth' in entry:
            auth = entry['download_auth']
            log.debug('Custom auth enabled for %s download: %s',
                      entry['title'], entry['download_auth'])

        try:
            response = task.requests.get(url, auth=auth, raise_status=False)
        except UnicodeError:
            log.error('Unicode error while encoding url %s', url)
            return
        if response.status_code != 200:
            log.debug('Got %s response from server. Saving error page.',
                      response.status_code)
            # Save the error page
            if response.content:
                self.save_error_page(entry, task, response.content)
            # Raise the error
            response.raise_for_status()
            return

        # expand ~ in temp path
        # TODO jinja?
        try:
            tmp_path = os.path.expanduser(tmp_path)
        except RenderError as e:
            entry.fail(
                'Could not set temp path. Error during string replacement: %s'
                % e)
            return

        # Clean illegal characters from temp path name
        tmp_path = pathscrub(tmp_path)

        # create if missing
        if not os.path.isdir(tmp_path):
            log.debug('creating tmp_path %s' % tmp_path)
            os.mkdir(tmp_path)

        # check for write-access
        if not os.access(tmp_path, os.W_OK):
            raise plugin.PluginError(
                'Not allowed to write to temp directory `%s`' % tmp_path)

        # download and write data into a temp file
        tmp_dir = tempfile.mkdtemp(dir=tmp_path)
        fname = hashlib.md5(url.encode('utf-8', 'replace')).hexdigest()
        datafile = os.path.join(tmp_dir, fname)
        outfile = io.open(datafile, 'wb')
        try:
            for chunk in response.iter_content(chunk_size=150 * 1024,
                                               decode_unicode=False):
                outfile.write(chunk)
        except Exception as e:
            # don't leave futile files behind
            # outfile has to be closed before we can delete it on Windows
            outfile.close()
            log.debug('Download interrupted, removing datafile')
            os.remove(datafile)
            if isinstance(e, socket.timeout):
                log.error('Timeout while downloading file')
            else:
                raise
        else:
            outfile.close()
            # Do a sanity check on downloaded file
            if os.path.getsize(datafile) == 0:
                entry.fail('File %s is 0 bytes in size' % datafile)
                os.remove(datafile)
                return
            # store temp filename into entry so other plugins may read and modify content
            # temp file is moved into final destination at self.output
            entry['file'] = datafile
            log.debug('%s field file set to: %s', entry['title'],
                      entry['file'])

        if 'content-type' in response.headers:
            entry['mime-type'] = str(
                parse_header(response.headers['content-type'])[0])
        else:
            entry['mime-type'] = "unknown/unknown"

        content_encoding = response.headers.get('content-encoding', '')
        decompress = 'gzip' in content_encoding or 'deflate' in content_encoding
        if 'content-length' in response.headers and not decompress:
            entry['content-length'] = int(response.headers['content-length'])

        # prefer content-disposition naming, note: content-disposition can be disabled completely
        # by setting entry field `content-disposition` to False
        if entry.get('content-disposition', True):
            self.filename_from_headers(entry, response)
        else:
            log.info('Content-disposition disabled for %s', entry['title'])
        self.filename_ext_from_mime(entry)

        if not entry.get('filename'):
            filename = unquote(url.rsplit('/', 1)[1])
            log.debug('No filename - setting from url: %s', filename)
            entry['filename'] = filename
        log.debug('Finishing download_entry() with filename %s',
                  entry.get('filename'))
Beispiel #45
0
    def __init__(self, host='localhost', port=5672, ssl=None, connect_timeout=None,
                 userid='guest', password='******', login_method='AMQPLAIN', virtual_host='/',
                 locale='en_US',
                 channel_max=65535, frame_max=131072,
                 heartbeat=0,
                 client_properties=None,
                 on_blocked=None, on_unblocked=None):
        """Create a connection to the specified host

        If you are using SSL, make sure the correct port number is specified (usually 5671), as the
        default of 5672 is for non-SSL connections.

        You can define an AMQP connection string as the host, this will be used to set
        the `host`, `port`, `userid`, `password` and `virtual_host`. The connection string follows
        this format:

            `amqp://[userid:password@]host[:port][/virtual_host]`

        :param str host: host or amqp connection string
        :param int port: port
        :param ssl: dict of SSL options passed to :func:`ssl.wrap_socket()`, None to disable SSL
        :param float connect_timeout: connect timeout
        :param str userid: username
        :param str password: password
        :param str login_method: login method (this is server-specific); default is for RabbitMQ
        :param str virtual_host: virtual host
        :param str locale: locale
        :param int channel_max: maximum number of channels
        :param int frame_max: maximum frame payload size in bytes
        :param float heartbeat: heartbeat interval in seconds, 0 disables heartbeat
        :param client_properties: dict of client properties
        :param on_blocked: callback on connection blocked
        :param on_unblocked: callback on connection unblocked
        :type connect_timeout: float or None
        :type client_properties: dict or None
        :type ssl: dict or None
        :type on_blocked: Callable or None
        :type on_unblocked: Callable or None
        """
        log.debug('amqpy {} Connection.__init__()'.format(__version__))
        self.conn_lock = Lock()

        #: Map of `{channel_id: Channel}` for all active channels
        #:
        #: :type: dict[int, Channel]
        self.channels = {}  # dict of {channel_id int: Channel}

        # the connection object itself is treated as channel 0
        super(Connection, self).__init__(self, 0)  # also sets channels[0] = self

        # instance variables
        #: :type: amqpy.transport.Transport
        self.transport = None
        self.method_reader = None
        self.method_writer = None
        self._wait_tune_ok = None

        # properties set in the start method, after a connection is established
        self.version_major = 0
        self.version_minor = 0
        self.server_properties = {}
        self.mechanisms = []
        self.locales = []

        # properties set in the Tune method
        self.channel_max = channel_max
        self.frame_max = frame_max
        if six.PY2:
            self._avail_channel_ids = array(b'H', range(self.channel_max, 0, -1))
        else:
            self._avail_channel_ids = array('H', range(self.channel_max, 0, -1))
        self._heartbeat_final = 0  # final heartbeat interval after negotiation
        self._heartbeat_server = None

        # detect amqp connection string
        if host.startswith('amqp://'):
            parts = urlparse("http://" + host[7:])
            host = unquote(parts.hostname or '') or None
            port = parts.port or 5672
            userid = unquote(parts.username or '') or 'guest'
            password = unquote(parts.password or '') or 'guest'
            virtual_host = unquote(parts.path[1:] or '/')

        # save connection parameters
        self._host = host
        self._port = port
        self._connect_timeout = connect_timeout
        self._ssl = ssl
        self._userid = userid
        self._password = password
        self._login_method = login_method
        self._virtual_host = virtual_host
        self._locale = locale
        self._heartbeat_client = heartbeat  # original heartbeat interval value proposed by client
        self._client_properties = client_properties

        # callbacks
        self.on_blocked = on_blocked
        self.on_unblocked = on_unblocked

        # heartbeat
        self._close_event = Event()
        self._heartbeat_thread = None

        self.connect()