コード例 #1
0
ファイル: common.py プロジェクト: ImmPortDB/immport-galaxy
def build_request_with_data(url, data, api_key, method):
    """Build a request with the received method."""
    http_redirect_with_data_handler = HTTPRedirectWithDataHandler(method=method)
    opener = build_opener(http_redirect_with_data_handler)
    install_opener(opener)
    url = make_url(url, api_key=api_key, args=None)
    request = Request(url, headers={'Content-Type': 'application/json'}, data=json.dumps(data))
    request_method = request.get_method()
    if request_method != method:
        request.get_method = lambda: method
    return opener, request
コード例 #2
0
ファイル: common.py プロジェクト: ImmPortDB/immport-galaxy
 def redirect_request(self, request, fp, code, msg, headers, new_url):
     request_method = request.get_method()
     if str(code) in self.redirect_codes and request_method in self.valid_methods:
         new_url = new_url.replace(' ', '%20')
         request = Request(new_url,
                           data=request.data,
                           headers=request.headers,
                           origin_req_host=request.get_origin_req_host(),
                           unverifiable=True)
         if self.method in self.valid_methods:
             if request.get_method() != self.method:
                 request.get_method = lambda: self.method
         return request
     else:
         HTTPRedirectHandler.redirect_request(request, fp, code, msg, headers, new_url)
コード例 #3
0
ファイル: connection.py プロジェクト: sunsongxp/ParsePy
    def execute(cls, uri, http_verb, extra_headers=None, batch=False, _body=None, **kw):
        """
        if batch == False, execute a command with the given parameters and
        return the response JSON.
        If batch == True, return the dictionary that would be used in a batch
        command.
        """
        if batch:
            urlsplitter = urlparse(API_ROOT).netloc
            ret = {"method": http_verb, "path": uri.split(urlsplitter, 1)[1]}
            if kw:
                ret["body"] = kw
            return ret

        if not ('app_id' in ACCESS_KEYS and 'rest_key' in ACCESS_KEYS):
            raise core.ParseError('Missing connection credentials')

        app_id = ACCESS_KEYS.get('app_id')
        rest_key = ACCESS_KEYS.get('rest_key')
        master_key = ACCESS_KEYS.get('master_key')

        url = uri if uri.startswith(API_ROOT) else cls.ENDPOINT_ROOT + uri
        if _body is None:
            data = kw and json.dumps(kw, default=date_handler) or "{}"
        else:
            data = _body
        if http_verb == 'GET' and data:
            url += '?%s' % urlencode(kw)
            data = None
        else:
            data = data

        headers = {
            'Content-type': 'application/json',
            'X-Parse-Application-Id': app_id,
            'X-Parse-REST-API-Key': rest_key
        }
        headers.update(extra_headers or {})

        request = Request(url.encode('utf-8'), data, headers)

        if ACCESS_KEYS.get('session_token'):
            request.add_header('X-Parse-Session-Token', ACCESS_KEYS.get('session_token'))
        elif master_key:
            request.add_header('X-Parse-Master-Key', master_key)

        request.get_method = lambda: http_verb

        try:
            response = urlopen(request, timeout=CONNECTION_TIMEOUT)
        except HTTPError as e:
            exc = {
                400: core.ResourceRequestBadRequest,
                401: core.ResourceRequestLoginRequired,
                403: core.ResourceRequestForbidden,
                404: core.ResourceRequestNotFound
                }.get(e.code, core.ParseError)
            raise exc(e.read())

        return json.loads(response.read().decode('utf-8'))
コード例 #4
0
def get_genome_space_launch_apps( atm_url, url_opener, file_url, file_type ):
    gs_request = Request( "%s/%s/webtool/descriptor" % ( atm_url, GENOMESPACE_API_VERSION_STRING ) )
    gs_request.get_method = lambda: 'GET'
    opened_gs_request = url_opener.open( gs_request )
    webtool_descriptors = json.loads( opened_gs_request.read() )
    webtools = []
    for webtool in webtool_descriptors:
        webtool_name = webtool.get( 'name' )
        base_url = webtool.get( 'baseUrl' )
        use_tool = False
        for param in webtool.get( 'fileParameters', [] ):
            for format in param.get( 'formats', [] ):
                if format.get( 'name' ) == file_type:
                    use_tool = True
                    break
            if use_tool:
                file_param_name = param.get( 'name' )
                # file_name_delimiters = param.get( 'nameDelimiters' )
                if '?' in base_url:
                    url_delimiter = "&"
                else:
                    url_delimiter = "?"
                launch_url = "%s%s%s" % ( base_url, url_delimiter, urlencode( [ ( file_param_name, file_url ) ] ) )
                webtools.append( ( launch_url, webtool_name ) )
                break
    return webtools
コード例 #5
0
ファイル: pagerduty.py プロジェクト: pebble/spacel-provision
    def _pd_api(self, url, data=None, method='GET'):
        url = '%s/%s' % (PD_API_BASE, url)
        request_args = {
            'headers': dict(self._pd_headers)
        }
        if six.PY3:  # pragma: no cover
            request_args['method'] = method

        if data is not None:
            request_args['data'] = json.dumps(data).encode('utf-8')
            request_args['headers']['Content-Type'] = APPLICATION_JSON

        request = Request(url, **request_args)
        if six.PY2:  # pragma: no cover
            request.get_method = lambda: method

        try:
            response = urlopen(request)
            return json.loads(response.read().decode('utf-8'))
        except HTTPError as e:
            response = e.read().decode('utf-8')
            logger.warning("API error: %s", response)
            if method == 'GET' and e.code == 404:
                return None
            else:
                raise e
コード例 #6
0
ファイル: blazemeter.py プロジェクト: Yingmin-Li/taurus
    def _request(self, url, data=None, headers=None, checker=None, method=None):
        if not headers:
            headers = {}
        if self.token:
            headers["X-API-Key"] = self.token
        self.log.debug("Request: %s %s %s", method if method else 'GET', url,
                       data[:self.logger_limit] if data else None)
        # .encode("utf-8") is probably better
        data = data.encode() if isinstance(data, six.text_type) else data
        request = Request(url, data, headers)
        if method:
            request.get_method = lambda: method

        response = urlopen(request, timeout=self.timeout)

        if checker:
            checker(response)

        resp = response.read()

        if not isinstance(resp, str):
            resp = resp.decode()

        self.log.debug("Response: %s", resp[:self.logger_limit] if resp else None)
        return json.loads(resp) if len(resp) else {}
コード例 #7
0
def set_genomespace_format_identifiers( url_opener, dm_site ):
    gs_request = Request( "%s/%s/dataformat/list" % ( dm_site, GENOMESPACE_API_VERSION_STRING ) )
    gs_request.get_method = lambda: 'GET'
    opened_gs_request = url_opener.open( gs_request )
    genomespace_formats = json.loads( opened_gs_request.read() )
    for format in genomespace_formats:
        GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT[ format['url'] ] = format['name']
コード例 #8
0
ファイル: common.py プロジェクト: ImmPortDB/immport-galaxy
def __del(api_key, url, data):
    """
    Do the actual DELETE
    """
    url = make_url(api_key, url)
    req = Request(url, headers={'Content-Type': 'application/json'}, data=json.dumps(data))
    req.get_method = lambda: 'DELETE'
    return json.loads(urlopen(req).read())
コード例 #9
0
def create_directory( url_opener, directory_dict, new_dir, dm_url ):
    payload = { "isDirectory": True }
    for dir_slice in new_dir:
        if dir_slice in ( '', '/', None ):
            continue
        url = '/'.join( ( directory_dict['url'], quote( dir_slice.replace( '/', '_' ), safe='' ) ) )
        new_dir_request = Request( url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' }, data=json.dumps( payload ) )
        new_dir_request.get_method = lambda: 'PUT'
        directory_dict = json.loads( url_opener.open( new_dir_request ).read() )
    return directory_dict
コード例 #10
0
ファイル: utils.py プロジェクト: reixd/python-driver
 def clear_all_queries(self, cluster_name=DEFAULT_CLUSTER):
     """
     Clear all the primed queries from a particular cluster
     :param cluster_name: cluster to clear queries from
     """
     opener = build_opener(HTTPHandler)
     request = Request("http://{0}/{1}/{2}".format(
         self.admin_addr, "prime", cluster_name))
     request.get_method = lambda: 'DELETE'
     connection = opener.open(request)
     return connection.read()
コード例 #11
0
 def test_upload_no_boundary(self):
     with self.assertRaises(HTTPError) as handler:
         data = b""
         request = Request(self._url('top/middle/'), data=data)
         request.add_header("Content-Length", len(data))
         request.add_header("Content-Type", "multipart/form-data")
         request.get_method = lambda: "POST"
         urlopen(request)
     self.assertEqual(handler.exception.code, 400)
     self.assertEqual(handler.exception.reason,
                      "'Content-Type' header does not contain a boundary")
コード例 #12
0
ファイル: utils.py プロジェクト: gaozhengwei/config
def get_system(token,
               method,
               api_cmd,
               api_cmd_headers=None,
               api_cmd_payload=None,
               timeout=10):
    """
    Make a rest-api request
    Returns: response as a dictionary
    """
    LOG.debug("%s cmd:%s hdr:%s payload:%s" %
              (method, api_cmd, api_cmd_headers, api_cmd_payload))

    response = None
    try:
        request_info = Request(api_cmd)
        request_info.get_method = lambda: method
        if token:
            request_info.add_header("X-Auth-Token", token.get_id())
        request_info.add_header("Accept", "application/json")

        if api_cmd_headers is not None:
            for header_type, header_value in api_cmd_headers.items():
                request_info.add_header(header_type, header_value)

        if api_cmd_payload is not None:
            request_info.add_data(api_cmd_payload)

        request = urlopen(request_info, timeout=timeout)
        response = request.read()

        if response == "":
            response = json.loads("{}")
        else:
            response = json.loads(response)
        request.close()

    except HTTPError as e:
        if 401 == e.code:
            if token:
                token.set_expired()
        LOG.warn("HTTP Error e.code=%s e=%s" % (e.code, e))
        if hasattr(e, 'msg') and e.msg:
            response = json.loads(e.msg)
        else:
            response = json.loads("{}")
        raise

    except URLError:
        LOG.error("Cannot access %s" % api_cmd)
        raise

    finally:
        return response
コード例 #13
0
ファイル: utils.py プロジェクト: thelastpickle/python-driver
 def clear_all_queries(self, cluster_name=DEFAULT_CLUSTER):
     """
     Clear all the primed queries from a particular cluster
     :param cluster_name: cluster to clear queries from
     """
     opener = build_opener(HTTPHandler)
     request = Request("http://{0}/{1}/{2}".format(
         self.admin_addr, "prime", cluster_name))
     request.get_method = lambda: 'DELETE'
     connection = opener.open(request)
     return connection.read()
コード例 #14
0
 def req(self, path, data=None, method=None):
     url = self.server + path
     if data:
         req = Request(url, headers={'Content-Type': 'application/json'}, data=json.dumps(data))
     else:
         req = Request(url, headers={'Content-Type': 'application/json'})
     if method:
         req.get_method = lambda: method
     res = self.opener.open(req)
     print('==> at %s (%s)' % (url, method or 'GET'))
     assert res.getcode() == 200, url
     return res
コード例 #15
0
    def submit_request(self, query):
        opener = build_opener(HTTPHandler)
        data = json.dumps(query.fetch_json()).encode('utf8')

        request = Request("http://{}/{}{}".format(
            self.admin_addr, query.path, query.fetch_url_params()), data=data)
        request.get_method = lambda: query.method
        request.add_header("Content-Type", 'application/json')
        request.add_header("Content-Length", len(data))

        connection = opener.open(request)
        return connection.read().decode('utf-8')
コード例 #16
0
ファイル: utils.py プロジェクト: thelastpickle/python-driver
    def submit_request(self, query):
        opener = build_opener(HTTPHandler)
        data = json.dumps(query.fetch_json()).encode('utf8')

        request = Request("http://{}/{}{}".format(
            self.admin_addr, query.path, query.fetch_url_params()), data=data)
        request.get_method = lambda: 'POST'
        request.add_header("Content-Type", 'application/json')
        request.add_header("Content-Length", len(data))

        connection = opener.open(request)
        return connection.read().decode('utf-8')
コード例 #17
0
ファイル: connection.py プロジェクト: shawnp/ParsePy
    def execute(cls, uri, http_verb, extra_headers=None, batch=False, **kw):
        """
        if batch == False, execute a command with the given parameters and
        return the response JSON.
        If batch == True, return the dictionary that would be used in a batch
        command.
        """
        if batch:
            ret = {"method": http_verb, "path": uri.split("parse.com", 1)[1]}
            if kw:
                ret["body"] = kw
            return ret

        if not ('app_id' in ACCESS_KEYS and 'rest_key' in ACCESS_KEYS):
            raise core.ParseError('Missing connection credentials')

        app_id = ACCESS_KEYS.get('app_id')
        rest_key = ACCESS_KEYS.get('rest_key')
        master_key = ACCESS_KEYS.get('master_key')

        headers = extra_headers or {}
        url = uri if uri.startswith(API_ROOT) else cls.ENDPOINT_ROOT + uri
        data = kw and json.dumps(kw) or "{}"
        if http_verb == 'GET' and data:
            url += '?%s' % urlencode(kw)
            data = None
        else:
            data = data.encode('utf-8')

        request = Request(url, data, headers)
        request.add_header('Content-type', 'application/json')
        request.add_header('X-Parse-Application-Id', app_id)
        request.add_header('X-Parse-REST-API-Key', rest_key)

        if master_key and 'X-Parse-Session-Token' not in headers.keys():
            request.add_header('X-Parse-Master-Key', master_key)

        request.get_method = lambda: http_verb

        try:
            response = urlopen(request)
        except HTTPError as e:
            exc = {
                400: core.ResourceRequestBadRequest,
                401: core.ResourceRequestLoginRequired,
                403: core.ResourceRequestForbidden,
                404: core.ResourceRequestNotFound
                }.get(e.code, core.ParseError)
            raise exc(e.read())

        return json.loads(response.read().decode('utf-8'))
コード例 #18
0
 def req(self, path, data=None, method=None):
     url = self.server + path
     if data:
         req = Request(url,
                       headers={'Content-Type': 'application/json'},
                       data=json.dumps(data))
     else:
         req = Request(url, headers={'Content-Type': 'application/json'})
     if method:
         req.get_method = lambda: method
     res = self.opener.open(req)
     print('==> at %s (%s)' % (url, method or 'GET'))
     assert res.getcode() == 200, url
     return res
コード例 #19
0
def set_genomespace_format_identifiers(url_opener, dm_site):
    gs_request = Request("%s/%s/dataformat/list" %
                         (dm_site, GENOMESPACE_API_VERSION_STRING))
    gs_request.get_method = lambda: 'GET'
    opened_gs_request = url_opener.open(gs_request)
    genomespace_formats = json.loads(opened_gs_request.read())
    for format in genomespace_formats:
        GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT[
            format['url']] = format['name']
    global GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN
    GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN = dict(
        (x[1], x[0])
        for x in GENOMESPACE_FORMAT_IDENTIFIER_TO_GENOMESPACE_EXT.items()).get(
            GENOMESPACE_UNKNOWN_FORMAT_KEY,
            GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN)
コード例 #20
0
def rest_api_request(token, method, api_cmd,
                     api_cmd_payload=None, timeout=10):
    """
    Make a rest-api request
    Returns: response as a dictionary
    """
    api_cmd_headers = dict()
    api_cmd_headers['Content-type'] = "application/json"
    api_cmd_headers['User-Agent'] = "cert-mon/1.0"

    try:
        request_info = Request(api_cmd)
        request_info.get_method = lambda: method
        if token:
            request_info.add_header("X-Auth-Token", token.get_id())
        request_info.add_header("Accept", "application/json")

        if api_cmd_headers is not None:
            for header_type, header_value in api_cmd_headers.items():
                request_info.add_header(header_type, header_value)

        if api_cmd_payload is not None:
            request_info.add_data(api_cmd_payload)

        request = None
        try:
            request = urlopen(request_info, timeout=timeout)
            response = request.read()
        finally:
            if request:
                request.close()

        if response == "":
            response = json.loads("{}")
        else:
            response = json.loads(response)

    except HTTPError as e:
        if 401 == e.code:
            if token:
                token.set_expired()
        raise

    except URLError:
        LOG.error("Cannot access %s" % api_cmd)
        raise

    return response
コード例 #21
0
ファイル: cdash.py プロジェクト: matzke1/spack
    def upload(self, filename):
        if not self.cdash_upload_url:
            return

        # Compute md5 checksum for the contents of this file.
        md5sum = checksum(hashlib.md5, filename, block_size=8192)

        opener = build_opener(HTTPHandler)
        with open(filename, 'rb') as f:
            url = "{0}&MD5={1}".format(self.cdash_upload_url, md5sum)
            request = Request(url, data=f)
            request.add_header('Content-Type', 'text/xml')
            request.add_header('Content-Length', os.path.getsize(filename))
            # By default, urllib2 only support GET and POST.
            # CDash needs expects this file to be uploaded via PUT.
            request.get_method = lambda: 'PUT'
            url = opener.open(request)
コード例 #22
0
ファイル: cdash.py プロジェクト: zygyz/spack
    def upload(self, filename):
        if not self.cdash_upload_url:
            return

        # Compute md5 checksum for the contents of this file.
        md5sum = checksum(hashlib.md5, filename, block_size=8192)

        opener = build_opener(HTTPHandler)
        with open(filename, 'rb') as f:
            url = "{0}&MD5={1}".format(self.cdash_upload_url, md5sum)
            request = Request(url, data=f)
            request.add_header('Content-Type', 'text/xml')
            request.add_header('Content-Length', os.path.getsize(filename))
            # By default, urllib2 only support GET and POST.
            # CDash needs expects this file to be uploaded via PUT.
            request.get_method = lambda: 'PUT'
            url = opener.open(request)
コード例 #23
0
def create_directory(url_opener, directory_dict, new_dir, dm_url):
    payload = {"isDirectory": True}
    for dir_slice in new_dir:
        if dir_slice in ('', '/', None):
            continue
        url = '/'.join(
            (directory_dict['url'], quote(dir_slice.replace('/', '_'),
                                          safe='')))
        new_dir_request = Request(url,
                                  headers={
                                      'Content-Type': 'application/json',
                                      'Accept': 'application/json'
                                  },
                                  data=json.dumps(payload))
        new_dir_request.get_method = lambda: 'PUT'
        directory_dict = json.loads(url_opener.open(new_dir_request).read())
    return directory_dict
コード例 #24
0
    def try_del(httpd, querystr):
        """Try DEL calls to the server."""

        num_requests["del_handler"] = 0

        opener = build_opener(HTTPHandler)
        request = Request(httpd_url(httpd, "/api/resource/1", querystr))
        request.get_method = lambda: "DEL"
        f = opener.open(request)

        assert f.getcode() == 200
        assert json.loads(f.read()) == {
            "called": 1,
            "id": "1",
            "query": querystr
        }
        assert num_requests["del_handler"] == 1
コード例 #25
0
def populate_buildgroup(job_names, group_name, project, site, credentials,
                        cdash_url):
    url = "{0}/api/v1/buildgroup.php".format(cdash_url)

    headers = {
        'Authorization': 'Bearer {0}'.format(credentials),
        'Content-Type': 'application/json',
    }

    opener = build_opener(HTTPHandler)

    parent_group_id = _create_buildgroup(opener, headers, url, project,
                                         group_name, 'Daily')
    group_id = _create_buildgroup(opener, headers, url, project,
                                  'Latest {0}'.format(group_name), 'Latest')

    if not parent_group_id or not group_id:
        msg = 'Failed to create or retrieve buildgroups for {0}'.format(
            group_name)
        raise SpackError(msg)

    data = {
        'project':
        project,
        'buildgroupid':
        group_id,
        'dynamiclist': [{
            'match': name,
            'parentgroupid': parent_group_id,
            'site': site
        } for name in job_names]
    }

    enc_data = json.dumps(data).encode('utf-8')

    request = Request(url, data=enc_data, headers=headers)
    request.get_method = lambda: 'PUT'

    response = opener.open(request)
    response_code = response.getcode()

    if response_code != 200:
        msg = 'Error response code ({0}) in populate_buildgroup'.format(
            response_code)
        raise SpackError(msg)
コード例 #26
0
 def recurse_directory_dict( url_opener, cur_options, url ):
     cur_directory = Request( url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json, text/plain' } )
     cur_directory.get_method = lambda: 'GET'
     # get url to upload to
     try:
         cur_directory = url_opener.open( cur_directory ).read()
     except HTTPError as e:
         log.debug( 'GenomeSpace export tool failed reading a directory "%s": %s' % ( url, e ) )
         return  # bad url, go to next
     cur_directory = json.loads( cur_directory )
     directory = cur_directory.get( 'directory', {} )
     contents = cur_directory.get( 'contents', [] )
     if directory.get( 'isDirectory', False ):
         selected = directory.get( 'path' ) == value
         cur_options.append( { 'name': directory.get( 'name' ), 'value': directory.get( 'path'), 'options': [], 'selected': selected  } )
         for sub_dir in contents:
             if sub_dir.get( 'isDirectory', False ):
                 recurse_directory_dict( url_opener, cur_options[-1]['options'], sub_dir.get( 'url' ) )
コード例 #27
0
 def recurse_directory_dict( url_opener, cur_options, url ):
     cur_directory = Request( url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json, text/plain' } )
     cur_directory.get_method = lambda: 'GET'
     # get url to upload to
     try:
         cur_directory = url_opener.open( cur_directory ).read()
     except HTTPError as e:
         log.debug( 'GenomeSpace export tool failed reading a directory "%s": %s' % ( url, e ) )
         return  # bad url, go to next
     cur_directory = json.loads( cur_directory )
     directory = cur_directory.get( 'directory', {} )
     contents = cur_directory.get( 'contents', [] )
     if directory.get( 'isDirectory', False ):
         selected = directory.get( 'path' ) == value
         cur_options.append( { 'name': directory.get( 'name' ), 'value': directory.get( 'path'), 'options': [], 'selected': selected  } )
         for sub_dir in contents:
             if sub_dir.get( 'isDirectory', False ):
                 recurse_directory_dict( url_opener, cur_options[-1]['options'], sub_dir.get( 'url' ) )
コード例 #28
0
    def http_request(self, url, method, data="", headers=None, timeout=None):
        if url[0:7].lower() != "http://":
            url = "http://%s" % url

        if hasattr(self, 'logger') and self.logger is not None:
            self.logger.debug("Sending http request. Url: %s, Data: %s, Headers: %s" % (url, str(data), str(headers)))

        req = Request(url, data, headers)
        req.get_method = lambda: method
        # The timeout parameter in urllib2.urlopen has strange behavior, and
        # seems to raise errors when set to a number. Using an opener works however.
        opener = build_opener()
        if timeout is None:
            response = opener.open(req)
        else:
            response = opener.open(req, timeout=timeout)

        return response
コード例 #29
0
def http_call(method, url, data=None):
    """Utility method for making HTTP requests."""
    LOG.debug("http_call(): Calling %s %s" % (method, url))
    opener = build_opener(HTTPHandler)
    if data:
        data = simplejson.dumps(data)
        LOG.debug("http_call(): With body: %s" % data)
    request = Request(url, data)
    request.add_header('Accept', 'application/json')
    if data:
        request.add_header('Content-Type', 'application/json')
    request.get_method = lambda: method
    resp = opener.open(request)
    if resp.getcode() >= 400:
        raise exceptions.RomanaException("Error in %s %s with payload %s: %s", method, url, data, resp)
    body = resp.read()
    data = simplejson.loads(body)
    return data
コード例 #30
0
def get_directory( url_opener, dm_url, path ):
    url = dm_url
    i = None
    dir_dict = {}
    for i, sub_path in enumerate( path ):
        url = "%s/%s" % ( url, sub_path )
        dir_request = Request( url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' } )
        dir_request.get_method = lambda: 'GET'
        try:
            dir_dict = json.loads( url_opener.open( dir_request ).read() )
        except HTTPError:
            # print "e", e, url #punting, assuming lack of permissions at this low of a level...
            continue
        break
    if i is not None:
        path = path[i + 1:]
    else:
        path = []
    return ( dir_dict, path )
コード例 #31
0
    def try_del(self, server_port, querystr):
        self.resource_del_called = 0

        opener = build_opener(HTTPHandler)
        request = Request(
            self.get_url('/api/resource/1', server_port, querystr))
        request.get_method = lambda: 'DEL'
        f = opener.open(request)

        try:
            self.assertEqual(f.getcode(), 200)
        except AttributeError:
            pass  # python 2.4
        self.assertEqual(json.loads(f.read()), {
            'called': 1,
            'id': str(1),
            'query': querystr
        })
        self.assertEqual(self.resource_del_called, 1)
コード例 #32
0
def get_directory( url_opener, dm_url, path ):
    url = dm_url
    i = None
    dir_dict = {}
    for i, sub_path in enumerate( path ):
        url = "%s/%s" % ( url, sub_path )
        dir_request = Request( url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' } )
        dir_request.get_method = lambda: 'GET'
        try:
            dir_dict = json.loads( url_opener.open( dir_request ).read() )
        except HTTPError:
            # print "e", e, url #punting, assuming lack of permissions at this low of a level...
            continue
        break
    if i is not None:
        path = path[i + 1:]
    else:
        path = []
    return ( dir_dict, path )
コード例 #33
0
ファイル: __init__.py プロジェクト: duanshuaimin/sentry
    def revoke_token(cls, token, uid):
        if not cls.REVOKE_TOKEN_URL:
            return
        url = cls.REVOKE_TOKEN_URL.format(token=token, uid=uid)
        params = cls.revoke_token_params(token, uid) or {}
        headers = cls.revoke_token_headers(token, uid) or {}
        data = None

        if cls.REVOKE_TOKEN_METHOD == 'GET':
            url = '{}?{}'.format(url, urlencode(params))
        else:
            data = urlencode(params)

        request = Request(url, data=data, headers=headers)
        if cls.REVOKE_TOKEN_URL.lower() not in ('get', 'post'):
            # Patch get_method to return the needed method
            request.get_method = lambda: cls.REVOKE_TOKEN_METHOD
        response = dsa_urlopen(request)
        return cls.process_revoke_token_response(response)
コード例 #34
0
ファイル: __init__.py プロジェクト: zeuskingzb/sentry
    def revoke_token(cls, token, uid):
        if not cls.REVOKE_TOKEN_URL:
            return
        url = cls.REVOKE_TOKEN_URL.format(token=token, uid=uid)
        params = cls.revoke_token_params(token, uid) or {}
        headers = cls.revoke_token_headers(token, uid) or {}
        data = None

        if cls.REVOKE_TOKEN_METHOD == "GET":
            url = u"{}?{}".format(url, urlencode(params))
        else:
            data = urlencode(params)

        request = Request(url, data=data, headers=headers)
        if cls.REVOKE_TOKEN_URL.lower() not in ("get", "post"):
            # Patch get_method to return the needed method
            request.get_method = lambda: cls.REVOKE_TOKEN_METHOD
        response = dsa_urlopen(request)
        return cls.process_revoke_token_response(response)
コード例 #35
0
def test_nonexistent_resources(httpd_no_urlhandlers):
    # GET: Return 404 for non-existent endpoint
    with pytest.raises(HTTPError) as excinfo:
        urlopen(httpd_url(httpd_no_urlhandlers, "/api/resource/"))
    assert excinfo.value.code == 404

    # POST: POST should also return 404
    with pytest.raises(HTTPError) as excinfo:
        urlopen(httpd_url(httpd_no_urlhandlers, "/api/resource/"),
                data=json.dumps({}))
    assert excinfo.value.code == 404

    # DEL: DEL should also return 404
    opener = build_opener(HTTPHandler)
    request = Request(httpd_url(httpd_no_urlhandlers, "/api/resource/"))
    request.get_method = lambda: "DEL"

    with pytest.raises(HTTPError) as excinfo:
        opener.open(request)
    assert excinfo.value.code == 404
コード例 #36
0
def request(url, data=None, headers=None, params=None):
    u'''Simple HTTP Client'''
    if params is not None:
        query = urlencode(params)
        url = '%s?%s' % (url, query)
    req = Request(url, headers=headers)
    if data is not None:
        req.add_data(data)
    try:
        logging.debug("%s %s", req.get_method(), url)
        res = urlopen(req)
        return json.loads(res.read())
    except HTTPError as err:
        logging.error("%s. Client error GET %s with status %d.", err.reason,
                      url, err.code)
    except URLError as err:
        logging.exception(err)
    except (ValueError, TypeError) as err:
        logging.error(err)
    return None
コード例 #37
0
def wait_for_spark_workers(num_of_expected_workers, timeout):
    """
    This queries the spark master and checks for the expected number of workers
    """
    start_time = time.time()
    while True:
        opener = build_opener(HTTPHandler)
        request = Request("http://{0}:7080".format(CASSANDRA_IP))
        request.get_method = lambda: 'GET'
        connection = opener.open(request)
        match = re.search('Alive Workers:.*(\d+)</li>', connection.read().decode('utf-8'))
        num_workers = int(match.group(1))
        if num_workers == num_of_expected_workers:
            match = True
            break
        elif time.time() - start_time > timeout:
            match = True
            break
        time.sleep(1)
    return match
コード例 #38
0
    def test_nonexistent_resources(self):
        # Create a server with a placeholder handler so we don't fall back
        # to serving local files
        httpd = mozhttpd.MozHttpd(port=0)
        httpd.start(block=False)
        server_port = httpd.httpd.server_port

        # GET: Return 404 for non-existent endpoint
        exception_thrown = False
        try:
            urlopen(self.get_url('/api/resource/', server_port, None))
        except HTTPError as e:
            self.assertEqual(e.code, 404)
            exception_thrown = True
        self.assertTrue(exception_thrown)

        # POST: POST should also return 404
        exception_thrown = False
        try:
            urlopen(
                self.get_url('/api/resource/', server_port, None),
                data=json.dumps({}),
            )
        except HTTPError as e:
            self.assertEqual(e.code, 404)
            exception_thrown = True
        self.assertTrue(exception_thrown)

        # DEL: DEL should also return 404
        exception_thrown = False
        try:
            opener = build_opener(HTTPHandler)
            request = Request(self.get_url('/api/resource/', server_port,
                                           None))
            request.get_method = lambda: 'DEL'
            opener.open(request)
        except HTTPError:
            self.assertEqual(e.code, 404)
            exception_thrown = True
        self.assertTrue(exception_thrown)
コード例 #39
0
    def invoke_storlet_on_copy_dest(self):
        # No COPY in swiftclient. Using urllib instead...
        url = '%s/%s/%s' % (self.url, self.container, self.storlet_file)
        headers = {
            'X-Auth-Token': self.token,
            'X-Run-Storlet': self.storlet_name,
            'X-Object-Meta-Name': 'thumbnail',
            'Destination': '%s/gen_thumb_on_copy_.jpg' % self.container
        }
        headers.update(self.additional_headers)
        req = Request(url, headers=headers)
        req.get_method = lambda: 'COPY'
        conn = urlopen(req, timeout=10)
        status = conn.getcode()
        self.assertIn(status, [201, 202])

        headers = c.head_object(self.url, self.token, self.container,
                                'gen_thumb_on_copy_.jpg')
        self.assertLess(int(headers['content-length']), 1087318)
        self.assertEqual('thumbnail', headers['x-object-meta-name'])
        self.assertTrue('x-object-meta-x-timestamp' not in headers)
        self.assertTrue('x-timestamp' in headers)
コード例 #40
0
ファイル: cdash.py プロジェクト: vmiheer/spack
    def upload(self, filename):
        if not self.cdash_upload_url:
            return

        # Compute md5 checksum for the contents of this file.
        md5sum = checksum(hashlib.md5, filename, block_size=8192)

        opener = build_opener(HTTPHandler)
        with open(filename, 'rb') as f:
            params_dict = {
                'build': self.buildname,
                'site': self.site,
                'stamp': self.buildstamp,
                'MD5': md5sum,
            }
            encoded_params = urlencode(params_dict)
            url = "{0}&{1}".format(self.cdash_upload_url, encoded_params)
            request = Request(url, data=f)
            request.add_header('Content-Type', 'text/xml')
            request.add_header('Content-Length', os.path.getsize(filename))
            if self.authtoken:
                request.add_header('Authorization',
                                   'Bearer {0}'.format(self.authtoken))
            try:
                # By default, urllib2 only support GET and POST.
                # CDash needs expects this file to be uploaded via PUT.
                request.get_method = lambda: 'PUT'
                response = opener.open(request)
                if self.current_package_name not in self.buildIds:
                    resp_value = response.read()
                    if isinstance(resp_value, bytes):
                        resp_value = resp_value.decode('utf-8')
                    match = self.buildid_regexp.search(resp_value)
                    if match:
                        buildid = match.group(1)
                        self.buildIds[self.current_package_name] = buildid
            except Exception as e:
                print("Upload to CDash failed: {0}".format(e))
コード例 #41
0
def download_from_genomespace_file_browser( json_parameter_file, genomespace_site, gs_toolname ):
    json_params = json.loads( open( json_parameter_file, 'r' ).read() )
    datasource_params = json_params.get( 'param_dict' )
    username = datasource_params.get( "gs-username", None )
    token = datasource_params.get( "gs-token", None )
    assert None not in [ username, token ], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get( "output", None )
    dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener( username, token, gs_toolname=gs_toolname )
    # load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
    set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] )

    file_url_prefix = "fileUrl"
    file_type_prefix = "fileFormat"
    metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )

    # setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir=json_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config=json_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )

    file_numbers = []
    for name in datasource_params.keys():
        if name.startswith( file_url_prefix ):
            name = name[len( file_url_prefix ):]
            file_numbers.append( int( name ) )
    if not file_numbers:
        if output_filename:
            open( output_filename, 'wb' )  # erase contents of file
        raise Exception( "You must select at least one file to import into Galaxy." )
    file_numbers.sort()
    used_filenames = []
    for file_num in file_numbers:
        url_key = "%s%i" % ( file_url_prefix, file_num )
        download_url = datasource_params.get( url_key, None )
        if download_url is None:
            break
        filetype_key = "%s%i" % ( file_type_prefix, file_num )
        filetype_url = datasource_params.get( filetype_key, None )
        galaxy_ext = get_galaxy_ext_from_genomespace_format_url( url_opener, filetype_url )
        formatted_download_url = "%s?%s" % ( download_url, urlencode( [ ( 'dataformat', filetype_url ) ] ) )
        new_file_request = Request( formatted_download_url )
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open( new_file_request )
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            # If the response has Content-Disposition, try to get filename from it
            content_disposition = dict( x.strip().split('=') if '=' in x else ( x.strip(), '' ) for x in target_download_url.info()['Content-Disposition'].split( ';' ) )
            if 'filename' in content_disposition:
                filename = content_disposition[ 'filename' ].strip( "\"'" )
        if not filename:
            parsed_url = urlparse( download_url )
            filename = unquote_plus( parsed_url[2].split( '/' )[-1] )
        if not filename:
            filename = download_url
        metadata_dict = None
        original_filename = filename
        if output_filename is None:
            filename = ''.join( c in FILENAME_VALID_CHARS and c or '-' for c in filename )
            while filename in used_filenames:
                filename = "-%s" % filename
            used_filenames.append( filename )
            output_filename = os.path.join( os.getcwd(), 'primary_%i_%s_visible_%s' % ( hda_id, filename, galaxy_ext ) )

            metadata_dict = dict( type='new_primary_dataset',
                                  base_dataset_id=dataset_id,
                                  ext=galaxy_ext,
                                  filename=output_filename,
                                  name="GenomeSpace import on %s" % ( original_filename ) )
        else:
            if dataset_id is not None:
                metadata_dict = dict( type='dataset',
                                      dataset_id=dataset_id,
                                      ext=galaxy_ext,
                                      name="GenomeSpace import on %s" % ( filename ) )
        output_file = open( output_filename, 'wb' )
        chunk_write( target_download_url, output_file )
        output_file.close()

        if ( galaxy_ext == AUTO_GALAXY_EXT or filetype_url == GENOMESPACE_FORMAT_IDENTIFIER_UNKNOWN ) and metadata_dict:
            # try to sniff datatype
            try:
                galaxy_ext = sniff.handle_uploaded_dataset_file( output_filename, datatypes_registry )
            except:
                # sniff failed
                galaxy_ext = original_filename.rsplit( '.', 1 )[-1]
                if galaxy_ext not in datatypes_registry.datatypes_by_extension:
                    galaxy_ext = DEFAULT_GALAXY_EXT
            metadata_dict[ 'ext' ] = galaxy_ext

        output_filename = None  # only have one filename available

        # write out metadata info
        if metadata_dict:
            metadata_parameter_file.write( "%s\n" % json.dumps( metadata_dict ) )

    metadata_parameter_file.close()
    return True
コード例 #42
0
def send_file_to_genomespace( genomespace_site, username, token, source_filename, target_directory, target_filename, file_type, content_type, log_filename, gs_toolname ):
    target_filename = target_filename.replace( '/', '-' )  # Slashes no longer allowed in filenames
    url_opener = get_cookie_opener( username, token, gs_toolname=gs_toolname )
    genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
    dm_url = genomespace_site_dict['dmServer']
    # get default directory
    if target_directory and target_directory[0] == '/':
        directory_dict, target_directory = get_directory( url_opener, dm_url, [ "%s/%s/%s" % ( GENOMESPACE_API_VERSION_STRING, 'file', target_directory[1] ) ] + target_directory[2:] )
        directory_dict = directory_dict['directory']
    else:
        directory_dict = get_personal_directory( url_opener, dm_url )['directory']  # this is the base for the auto-generated galaxy export directories
    # what directory to stuff this in
    target_directory_dict = create_directory( url_opener, directory_dict, target_directory, dm_url )
    content_length = os.path.getsize( source_filename )
    input_file = open( source_filename, 'rb' )
    if content_length > TARGET_SIMPLE_PUT_UPLOAD_SIZE:
        # Determine sizes of each part.
        split_count = content_length / TARGET_SPLIT_SIZE
        last_size = content_length - ( split_count * TARGET_SPLIT_SIZE )
        sizes = [ TARGET_SPLIT_SIZE ] * split_count
        if last_size:
            if last_size < MIN_MULTIPART_UPLOAD_SIZE:
                if sizes:
                    sizes[-1] = sizes[-1] + last_size
                else:
                    sizes = [ last_size ]
            else:
                sizes.append( last_size )
        print("Performing multi-part upload in %i parts." % ( len( sizes ) ))
        # get upload url
        upload_url = "uploadinfo"
        upload_url = "%s/%s/%s%s/%s" % ( dm_url, GENOMESPACE_API_VERSION_STRING, upload_url, target_directory_dict['path'], quote( target_filename, safe='' ) )
        upload_request = Request( upload_url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' } )
        upload_request.get_method = lambda: 'GET'
        upload_info = json.loads( url_opener.open( upload_request ).read() )
        conn = S3Connection( aws_access_key_id=upload_info['amazonCredentials']['accessKey'],
                             aws_secret_access_key=upload_info['amazonCredentials']['secretKey'],
                             security_token=upload_info['amazonCredentials']['sessionToken'] )
        # Cannot use conn.get_bucket due to permissions, manually create bucket object
        bucket = boto.s3.bucket.Bucket( connection=conn, name=upload_info['s3BucketName'] )
        mp = bucket.initiate_multipart_upload( upload_info['s3ObjectKey'] )
        for i, part_size in enumerate( sizes, start=1 ):
            fh = tempfile.TemporaryFile( 'wb+' )
            while part_size:
                if CHUNK_SIZE > part_size:
                    read_size = part_size
                else:
                    read_size = CHUNK_SIZE
                chunk = input_file.read( read_size )
                fh.write( chunk )
                part_size = part_size - read_size
            fh.flush()
            fh.seek(0)
            mp.upload_part_from_file( fh, i )
            fh.close()
        upload_result = mp.complete_upload()
    else:
        print('Performing simple put upload.')
        upload_url = "uploadurl"
        content_md5 = hashlib.md5()
        chunk_write( input_file, content_md5, target_method="update" )
        input_file.seek( 0 )  # back to start, for uploading

        upload_params = { 'Content-Length': content_length, 'Content-MD5': base64.standard_b64encode( content_md5.digest() ), 'Content-Type': content_type }
        upload_url = "%s/%s/%s%s/%s?%s" % ( dm_url, GENOMESPACE_API_VERSION_STRING, upload_url, target_directory_dict['path'], quote( target_filename, safe='' ), urlencode( upload_params ) )
        new_file_request = Request( upload_url )  # , headers = { 'Content-Type': 'application/json', 'Accept': 'application/text' } ) #apparently http://www.genomespace.org/team/specs/updated-dm-rest-api:"Every HTTP request to the Data Manager should include the Accept header with a preference for the media types application/json and application/text." is not correct
        new_file_request.get_method = lambda: 'GET'
        # get url to upload to
        target_upload_url = url_opener.open( new_file_request ).read()
        # upload file to determined url
        upload_headers = dict( upload_params )
        # upload_headers[ 'x-amz-meta-md5-hash' ] = content_md5.hexdigest()
        upload_headers[ 'Accept' ] = 'application/json'
        upload_file_request = Request( target_upload_url, headers=upload_headers, data=input_file )
        upload_file_request.get_method = lambda: 'PUT'
        upload_result = urlopen( upload_file_request ).read()
    result_url = "%s/%s" % ( target_directory_dict['url'], quote( target_filename, safe='' ) )
    # determine available gs launch apps
    web_tools = get_genome_space_launch_apps( genomespace_site_dict['atmServer'], url_opener, result_url, file_type )
    if log_filename:
        log_file = open( log_filename, 'wb' )
        log_file.write( "<html><head><title>File uploaded to GenomeSpace from Galaxy</title></head><body>\n" )
        log_file.write( '<p>Uploaded <a href="%s">%s/%s</a> to GenomeSpace.</p>\n' % ( result_url, target_directory_dict['path'], target_filename ) )
        if web_tools:
            log_file.write( "<p>You may open this file directly in the following applications:</p>\n" )
            log_file.write( '<p><ul>\n' )
            for web_tool in web_tools:
                log_file.write( '<li><a href="%s">%s</a></li>\n' % ( web_tool ) )
            log_file.write( '</p></ul>\n' )
        else:
            log_file.write( '<p>There are no GenomeSpace applications available for file type: %s</p>\n' % ( file_type ) )
        log_file.write( "</body></html>\n" )
    return upload_result
コード例 #43
0
def builtwith(url, headers=None, html=None, user_agent='builtwith'):
    """Detect the technology used to build a website

    FIXME: test data (maybe compare against node wappalyzer-cli)?
    """
    techs = {}

    # check URL
    for app_name, app_spec in data['apps'].items():
        if 'url' in app_spec:
            if contains(url, app_spec['url']):
                add_app(techs, app_name, app_spec)

    # download content
    if None in (headers, html):
        try:
            request = Request(url, None, {'User-Agent': user_agent})
            if html:
                # already have HTML so just need to make HEAD request for headers
                request.get_method = lambda : 'HEAD'
            response = urlopen(request)
            if headers is None:
                headers = response.headers
            if html is None:
                html = response.read().decode('utf-8')
        except Exception as e:
            print('Error:', e)
            request = None

    # check headers
    if headers:
        for app_name, app_spec in data['apps'].items():
            if 'headers' in app_spec:
                if contains_dict(headers, app_spec['headers']):
                    add_app(techs, app_name, app_spec)

    # check html
    if html:
        # node version only looks in script tag itself
        script_tags = RE_SCRIPTS.findall(html) + RE_LINKS.findall(html)

        for app_name, app_spec in data['apps'].items():
            for s_tag in script_tags:
                snippets = app_spec.get('script', [])
                if not isinstance(snippets, list):
                    snippets = [snippets]
                for snippet in snippets:
                    if contains(s_tag, snippet):
                        add_app(techs, app_name, app_spec)
                        break

            snippets = app_spec.get('html', [])
            if not isinstance(snippets, list):
                snippets = [snippets]
            for snippet in snippets:
                if contains(html, snippet):
                    add_app(techs, app_name, app_spec)
                    break

        # check meta
        # XXX add proper meta data parsing
        metas = dict(RE_META.findall(html))
        for app_name, app_spec in data['apps'].items():
            for name, content in app_spec.get('meta', {}).items():
                if name in metas:
                    if contains(metas[name], content):
                        add_app(techs, app_name, app_spec)
                        break

    return techs
コード例 #44
0
ファイル: connection.py プロジェクト: the0s/ParsePy
    def execute(cls, uri, http_verb, extra_headers=None, batch=False, _body=None, **kw):
        """
        if batch == False, execute a command with the given parameters and
        return the response JSON.
        If batch == True, return the dictionary that would be used in a batch
        command.
        """
        if batch:
            urlsplitter = urlparse(API_ROOT).netloc
            ret = {"method": http_verb, "path": uri.split(urlsplitter, 1)[1]}
            if kw:
                ret["body"] = kw
            return ret

        if not ('app_id' in ACCESS_KEYS and 'rest_key' in ACCESS_KEYS):
            raise core.ParseError('Missing connection credentials')

        app_id = ACCESS_KEYS.get('app_id')
        rest_key = ACCESS_KEYS.get('rest_key')
        master_key = ACCESS_KEYS.get('master_key')

        url = uri if uri.startswith(API_ROOT) else cls.ENDPOINT_ROOT + uri
        if _body is None:
            data = kw and json.dumps(kw, default=date_handler) or "{}"
        else:
            data = _body
        if http_verb == 'GET' and data:
            url += '?%s' % urlencode(kw)
            data = None
        else:
            if cls.__name__ == 'File':
                data = data
            else:
                data = data.encode('utf-8')

        headers = {
            'Content-type': 'application/json',
            'X-Parse-Application-Id': app_id,
            'X-Parse-REST-API-Key': rest_key
        }
        headers.update(extra_headers or {})

        if cls.__name__ == 'File':
            #request = Request(url.encode('utf-8'), data, headers)
            request = Request(url, data, headers)
        else:
            request = Request(url, data, headers)

        if ACCESS_KEYS.get('session_token'):
            request.add_header('X-Parse-Session-Token', ACCESS_KEYS.get('session_token'))
        elif master_key:
            request.add_header('X-Parse-Master-Key', master_key)

        request.get_method = lambda: http_verb

        try:
            response = urlopen(request, timeout=CONNECTION_TIMEOUT)
        except HTTPError as e:
            exc = {
                400: core.ResourceRequestBadRequest,
                401: core.ResourceRequestLoginRequired,
                403: core.ResourceRequestForbidden,
                404: core.ResourceRequestNotFound
                }.get(e.code, core.ParseError)
            raise exc(e.read())

        return json.loads(response.read().decode('utf-8'))
コード例 #45
0
ファイル: web.py プロジェクト: matzke1/spack
def _spider(url, visited, root, depth, max_depth, raise_on_error):
    """Fetches URL and any pages it links to up to max_depth.

       depth should initially be zero, and max_depth is the max depth of
       links to follow from the root.

       Prints out a warning only if the root can't be fetched; it ignores
       errors with pages that the root links to.

       Returns a tuple of:
       - pages: dict of pages visited (URL) mapped to their full text.
       - links: set of links encountered while visiting the pages.
    """
    pages = {}     # dict from page URL -> text content.
    links = set()  # set of all links seen on visited pages.

    # root may end with index.html -- chop that off.
    if root.endswith('/index.html'):
        root = re.sub('/index.html$', '', root)

    try:
        context = None
        verify_ssl = spack.config.get('config:verify_ssl')
        pyver = sys.version_info
        if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)):
            if verify_ssl:
                tty.warn("Spack will not check SSL certificates. You need to "
                         "update your Python to enable certificate "
                         "verification.")
        elif verify_ssl:
            # We explicitly create default context to avoid error described in
            # https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html
            context = ssl.create_default_context()
        else:
            context = ssl._create_unverified_context()

        # Make a HEAD request first to check the content type.  This lets
        # us ignore tarballs and gigantic files.
        # It would be nice to do this with the HTTP Accept header to avoid
        # one round-trip.  However, most servers seem to ignore the header
        # if you ask for a tarball with Accept: text/html.
        req = Request(url)
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)

        if "Content-type" not in resp.headers:
            tty.debug("ignoring page " + url)
            return pages, links

        if not resp.headers["Content-type"].startswith('text/html'):
            tty.debug("ignoring page " + url + " with content type " +
                      resp.headers["Content-type"])
            return pages, links

        # Do the real GET request when we know it's just HTML.
        req.get_method = lambda: "GET"
        response = _urlopen(req, timeout=_timeout, context=context)
        response_url = response.geturl()

        # Read the page and and stick it in the map we'll return
        page = response.read().decode('utf-8')
        pages[response_url] = page

        # Parse out the links in the page
        link_parser = LinkParser()
        subcalls = []
        link_parser.feed(page)

        while link_parser.links:
            raw_link = link_parser.links.pop()
            abs_link = urljoin(response_url, raw_link.strip())

            links.add(abs_link)

            # Skip stuff that looks like an archive
            if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
                continue

            # Skip things outside the root directory
            if not abs_link.startswith(root):
                continue

            # Skip already-visited links
            if abs_link in visited:
                continue

            # If we're not at max depth, follow links.
            if depth < max_depth:
                subcalls.append((abs_link, visited, root,
                                 depth + 1, max_depth, raise_on_error))
                visited.add(abs_link)

        if subcalls:
            pool = NonDaemonPool(processes=len(subcalls))
            try:
                results = pool.map(_spider_wrapper, subcalls)

                for sub_pages, sub_links in results:
                    pages.update(sub_pages)
                    links.update(sub_links)

            finally:
                pool.terminate()
                pool.join()

    except URLError as e:
        tty.debug(e)

        if hasattr(e, 'reason') and isinstance(e.reason, ssl.SSLError):
            tty.warn("Spack was unable to fetch url list due to a certificate "
                     "verification problem. You can try running spack -k, "
                     "which will not check SSL certificates. Use this at your "
                     "own risk.")

        if raise_on_error:
            raise NoNetworkConnectionError(str(e), url)

    except HTMLParseError as e:
        # This error indicates that Python's HTML parser sucks.
        msg = "Got an error parsing HTML."

        # Pre-2.7.3 Pythons in particular have rather prickly HTML parsing.
        if sys.version_info[:3] < (2, 7, 3):
            msg += " Use Python 2.7.3 or newer for better HTML parsing."

        tty.warn(msg, url, "HTMLParseError: " + str(e))

    except Exception as e:
        # Other types of errors are completely ignored, except in debug mode.
        tty.debug("Error in _spider: %s:%s" % (type(e), e),
                  traceback.format_exc())

    return pages, links
コード例 #46
0
def download_from_genomespace_importer(username, token, json_parameter_file,
                                       genomespace_site, gs_toolname):
    json_params = json.loads(open(json_parameter_file, 'r').read())
    datasource_params = json_params.get('param_dict')
    assert None not in [username,
                        token], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get("output_file1", None)
    dataset_id = base_dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener(username, token, gs_toolname=gs_toolname)
    # load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[genomespace_site]
    set_genomespace_format_identifiers(url_opener,
                                       genomespace_site_dict['dmServer'])
    file_url_name = "URL"
    metadata_parameter_file = open(
        json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb')
    # setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=json_params['job_config']['GALAXY_ROOT_DIR'],
        config=json_params['job_config']['GALAXY_DATATYPES_CONF_FILE'])
    url_param = datasource_params.get(file_url_name, None)
    used_filenames = []
    for download_url in url_param.split(','):
        using_temp_file = False
        parsed_url = urlparse(download_url)
        query_params = parse_qs(parsed_url[4])
        # write file to disk
        new_file_request = Request(download_url)
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open(new_file_request)
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            content_disposition = dict(
                x.strip().split('=') if '=' in x else (x.strip(), '') for x in
                target_download_url.info()['Content-Disposition'].split(';'))
            if 'filename' in content_disposition:
                filename = content_disposition['filename'].strip("\"'")
        if not filename:
            parsed_url = urlparse(download_url)
            query_params = parse_qs(parsed_url[4])
            filename = unquote_plus(parsed_url[2].split('/')[-1])
        if not filename:
            filename = download_url
        if output_filename is None:
            # need to use a temp file here, because we do not know the ext yet
            using_temp_file = True
            output_filename = tempfile.NamedTemporaryFile(
                prefix='tmp-genomespace-importer-').name
        output_file = open(output_filename, 'wb')
        chunk_write(target_download_url, output_file)
        output_file.close()

        # determine file format
        file_type = None
        if 'dataformat' in query_params:  # this is a converted dataset
            file_type = query_params['dataformat'][0]
            file_type = get_galaxy_ext_from_genomespace_format_url(
                url_opener, file_type)
        else:
            try:
                # get and use GSMetadata object
                download_file_path = download_url.split(
                    "%s/file/" % (genomespace_site_dict['dmServer']), 1
                )[-1]  # FIXME: This is a very bad way to get the path for determining metadata. There needs to be a way to query API using download URLto get to the metadata object
                metadata_request = Request(
                    "%s/%s/filemetadata/%s" %
                    (genomespace_site_dict['dmServer'],
                     GENOMESPACE_API_VERSION_STRING, download_file_path))
                metadata_request.get_method = lambda: 'GET'
                metadata_url = url_opener.open(metadata_request)
                file_metadata_dict = json.loads(metadata_url.read())
                metadata_url.close()
                file_type = file_metadata_dict.get('dataFormat', None)
                if file_type and file_type.get('url'):
                    file_type = file_type.get('url')
                    file_type = get_galaxy_ext_from_genomespace_format_url(
                        url_opener, file_type, default=None)
            except:
                pass
        if file_type is None:
            # try to sniff datatype
            try:
                file_type = sniff.handle_uploaded_dataset_file(
                    output_filename, datatypes_registry)
            except:
                pass  # sniff failed
        if file_type is None and '.' in parsed_url[2]:
            # still no known datatype, fall back to using extension
            file_type = parsed_url[2].rsplit('.', 1)[-1]
            file_type = GENOMESPACE_EXT_TO_GALAXY_EXT.get(file_type, file_type)
        if file_type is None:
            # use default extension (e.g. 'data')
            file_type = DEFAULT_GALAXY_EXT

        # save json info for single primary dataset
        if dataset_id is not None:
            metadata_parameter_file.write("%s\n" % json.dumps(
                dict(type='dataset',
                     dataset_id=dataset_id,
                     ext=file_type,
                     name="GenomeSpace importer on %s" % (filename))))
        # if using tmp file, move the file to the new file path dir to get scooped up later
        if using_temp_file:
            original_filename = filename
            filename = ''.join(c in FILENAME_VALID_CHARS and c or '-'
                               for c in filename)
            while filename in used_filenames:
                filename = "-%s" % filename
            used_filenames.append(filename)
            target_output_filename = os.path.join(
                os.getcwd(),
                'primary_%i_%s_visible_%s' % (hda_id, filename, file_type))
            shutil.move(output_filename, target_output_filename)
            metadata_parameter_file.write("%s\n" % json.dumps(
                dict(type='new_primary_dataset',
                     base_dataset_id=base_dataset_id,
                     ext=file_type,
                     filename=target_output_filename,
                     name="GenomeSpace importer on %s" % (original_filename))))
        dataset_id = None  # only one primary dataset available
        output_filename = None  # only have one filename available
    metadata_parameter_file.close()
    return True
コード例 #47
0
def rest_api_request(token,
                     method,
                     api_cmd,
                     api_cmd_headers=None,
                     api_cmd_payload=None,
                     timeout=10):
    """
    Make a rest-api request
    Returns: response as a dictionary
    """

    # signal.signal(signal.SIGALRM, _timeout_handler)
    # if hasattr(signal, 'SIGALRM'):
    #     signal.alarm(timeout)

    LOG.info("%s cmd:%s hdr:%s payload:%s" %
             (method, api_cmd, api_cmd_headers, api_cmd_payload))

    response = None
    try:
        request_info = Request(api_cmd)
        request_info.get_method = lambda: method
        if token:
            request_info.add_header("X-Auth-Token", token.get_id())
        request_info.add_header("Accept", "application/json")

        if api_cmd_headers is not None:
            for header_type, header_value in api_cmd_headers.items():
                request_info.add_header(header_type, header_value)

        if api_cmd_payload is not None:
            request_info.add_data(api_cmd_payload)

        request = urlopen(request_info, timeout=timeout)
        response = request.read()

        if response == "":
            response = json.loads("{}")
        else:
            response = json.loads(response)
        request.close()

        LOG.info("Response=%s" % response)

    except HTTPError as e:
        if 401 == e.code:
            if token:
                token.set_expired()
        LOG.warn("HTTP Error e.code=%s e=%s" % (e.code, e))
        if hasattr(e, 'msg') and e.msg:
            response = json.loads(e.msg)
        else:
            response = json.loads("{}")

        LOG.info("HTTPError response=%s" % (response))
        raise OpenStackRestAPIException(e.message, e.code, "%s" % e)

    except URLError as e:
        LOG.warn("URLError Error e=%s" % (e))
        raise OpenStackException(e.message, "%s" % e)

    except si_exception.SysInvSignalTimeout as e:
        LOG.warn("Timeout Error e=%s" % (e))
        raise OpenStackException(e.message, "%s" % e)

    finally:
        signal.alarm(0)
        return response
コード例 #48
0
def _spider(url, visited, root, depth, max_depth, raise_on_error):
    """Fetches URL and any pages it links to up to max_depth.

       depth should initially be zero, and max_depth is the max depth of
       links to follow from the root.

       Prints out a warning only if the root can't be fetched; it ignores
       errors with pages that the root links to.

       Returns a tuple of:
       - pages: dict of pages visited (URL) mapped to their full text.
       - links: set of links encountered while visiting the pages.
    """
    pages = {}  # dict from page URL -> text content.
    links = set()  # set of all links seen on visited pages.

    # root may end with index.html -- chop that off.
    if root.endswith('/index.html'):
        root = re.sub('/index.html$', '', root)

    try:
        context = None
        if sys.version_info < (2, 7, 9) or \
                ((3,) < sys.version_info < (3, 4, 3)):
            if not spack.insecure:
                tty.warn("Spack will not check SSL certificates. You need to "
                         "update your Python to enable certificate "
                         "verification.")
        else:
            # We explicitly create default context to avoid error described in
            # https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html
            context = ssl._create_unverified_context() \
                if spack.insecure \
                else ssl.create_default_context()

        # Make a HEAD request first to check the content type.  This lets
        # us ignore tarballs and gigantic files.
        # It would be nice to do this with the HTTP Accept header to avoid
        # one round-trip.  However, most servers seem to ignore the header
        # if you ask for a tarball with Accept: text/html.
        req = Request(url)
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)

        if "Content-type" not in resp.headers:
            tty.debug("ignoring page " + url)
            return pages, links

        if not resp.headers["Content-type"].startswith('text/html'):
            tty.debug("ignoring page " + url + " with content type " +
                      resp.headers["Content-type"])
            return pages, links

        # Do the real GET request when we know it's just HTML.
        req.get_method = lambda: "GET"
        response = _urlopen(req, timeout=_timeout, context=context)
        response_url = response.geturl()

        # Read the page and and stick it in the map we'll return
        page = response.read().decode('utf-8')
        pages[response_url] = page

        # Parse out the links in the page
        link_parser = LinkParser()
        subcalls = []
        link_parser.feed(page)

        while link_parser.links:
            raw_link = link_parser.links.pop()
            abs_link = urljoin(response_url, raw_link.strip())

            links.add(abs_link)

            # Skip stuff that looks like an archive
            if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES):
                continue

            # Skip things outside the root directory
            if not abs_link.startswith(root):
                continue

            # Skip already-visited links
            if abs_link in visited:
                continue

            # If we're not at max depth, follow links.
            if depth < max_depth:
                subcalls.append((abs_link, visited, root, depth + 1, max_depth,
                                 raise_on_error))
                visited.add(abs_link)

        if subcalls:
            pool = NonDaemonPool(processes=len(subcalls))
            try:
                results = pool.map(_spider_wrapper, subcalls)

                for sub_pages, sub_links in results:
                    pages.update(sub_pages)
                    links.update(sub_links)

            finally:
                pool.terminate()
                pool.join()

    except URLError as e:
        tty.debug(e)

        if isinstance(e.reason, ssl.SSLError):
            tty.warn("Spack was unable to fetch url list due to a certificate "
                     "verification problem. You can try running spack -k, "
                     "which will not check SSL certificates. Use this at your "
                     "own risk.")

        if raise_on_error:
            raise NoNetworkConnectionError(str(e), url)

    except HTMLParseError as e:
        # This error indicates that Python's HTML parser sucks.
        msg = "Got an error parsing HTML."

        # Pre-2.7.3 Pythons in particular have rather prickly HTML parsing.
        if sys.version_info[:3] < (2, 7, 3):
            msg += " Use Python 2.7.3 or newer for better HTML parsing."

        tty.warn(msg, url, "HTMLParseError: " + str(e))

    except Exception as e:
        # Other types of errors are completely ignored, except in debug mode.
        tty.debug("Error in _spider: %s:%s" % (type(e), e),
                  traceback.format_exc())

    return pages, links
コード例 #49
0
ファイル: web.py プロジェクト: RadeonOpenCompute/rocm-spack
def read_from_url(url, accept_content_type=None):
    url = url_util.parse(url)
    context = None

    verify_ssl = spack.config.get('config:verify_ssl')

    # Don't even bother with a context unless the URL scheme is one that uses
    # SSL certs.
    if uses_ssl(url):
        if verify_ssl:
            if __UNABLE_TO_VERIFY_SSL:
                # User wants SSL verification, but it cannot be provided.
                warn_no_ssl_cert_checking()
            else:
                # User wants SSL verification, and it *can* be provided.
                context = ssl.create_default_context()  # novm
        else:
            # User has explicitly indicated that they do not want SSL
            # verification.
            if not __UNABLE_TO_VERIFY_SSL:
                context = ssl._create_unverified_context()

    url_scheme = url.scheme
    url = url_util.format(url)
    if sys.platform == "win32" and url_scheme == "file":
        url = convert_to_posix_path(url)
    req = Request(url)

    content_type = None
    is_web_url = url_scheme in ('http', 'https')
    if accept_content_type and is_web_url:
        # Make a HEAD request first to check the content type.  This lets
        # us ignore tarballs and gigantic files.
        # It would be nice to do this with the HTTP Accept header to avoid
        # one round-trip.  However, most servers seem to ignore the header
        # if you ask for a tarball with Accept: text/html.
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)

        content_type = get_header(resp.headers, 'Content-type')

    # Do the real GET request when we know it's just HTML.
    req.get_method = lambda: "GET"

    try:
        response = _urlopen(req, timeout=_timeout, context=context)
    except URLError as err:
        raise SpackWebError('Download failed: {ERROR}'.format(ERROR=str(err)))

    if accept_content_type and not is_web_url:
        content_type = get_header(response.headers, 'Content-type')

    reject_content_type = (accept_content_type and
                           (content_type is None or
                            not content_type.startswith(accept_content_type)))

    if reject_content_type:
        tty.debug("ignoring page {0}{1}{2}".format(
            url, " with content type " if content_type is not None else "",
            content_type or ""))

        return None, None, None

    return response.geturl(), response.headers, response
コード例 #50
0
def send_file_to_genomespace( genomespace_site, username, token, source_filename, target_directory, target_filename, file_type, content_type, log_filename, gs_toolname ):
    target_filename = target_filename.replace( '/', '-' )  # Slashes no longer allowed in filenames
    url_opener = get_cookie_opener( username, token, gs_toolname=gs_toolname )
    genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
    dm_url = genomespace_site_dict['dmServer']
    # get default directory
    if target_directory and target_directory[0] == '/':
        directory_dict, target_directory = get_directory( url_opener, dm_url, [ "%s/%s/%s" % ( GENOMESPACE_API_VERSION_STRING, 'file', target_directory[1] ) ] + target_directory[2:] )
        directory_dict = directory_dict['directory']
    else:
        directory_dict = get_personal_directory( url_opener, dm_url )['directory']  # this is the base for the auto-generated galaxy export directories
    # what directory to stuff this in
    target_directory_dict = create_directory( url_opener, directory_dict, target_directory, dm_url )
    content_length = os.path.getsize( source_filename )
    input_file = open( source_filename, 'rb' )
    if content_length > TARGET_SIMPLE_PUT_UPLOAD_SIZE:
        # Determine sizes of each part.
        split_count = content_length / TARGET_SPLIT_SIZE
        last_size = content_length - ( split_count * TARGET_SPLIT_SIZE )
        sizes = [ TARGET_SPLIT_SIZE ] * split_count
        if last_size:
            if last_size < MIN_MULTIPART_UPLOAD_SIZE:
                if sizes:
                    sizes[-1] = sizes[-1] + last_size
                else:
                    sizes = [ last_size ]
            else:
                sizes.append( last_size )
        print("Performing multi-part upload in %i parts." % ( len( sizes ) ))
        # get upload url
        upload_url = "uploadinfo"
        upload_url = "%s/%s/%s%s/%s" % ( dm_url, GENOMESPACE_API_VERSION_STRING, upload_url, target_directory_dict['path'], quote( target_filename, safe='' ) )
        upload_request = Request( upload_url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' } )
        upload_request.get_method = lambda: 'GET'
        upload_info = json.loads( url_opener.open( upload_request ).read() )
        conn = S3Connection( aws_access_key_id=upload_info['amazonCredentials']['accessKey'],
                             aws_secret_access_key=upload_info['amazonCredentials']['secretKey'],
                             security_token=upload_info['amazonCredentials']['sessionToken'] )
        # Cannot use conn.get_bucket due to permissions, manually create bucket object
        bucket = boto.s3.bucket.Bucket( connection=conn, name=upload_info['s3BucketName'] )
        mp = bucket.initiate_multipart_upload( upload_info['s3ObjectKey'] )
        for i, part_size in enumerate( sizes, start=1 ):
            fh = tempfile.TemporaryFile( 'wb+' )
            while part_size:
                if CHUNK_SIZE > part_size:
                    read_size = part_size
                else:
                    read_size = CHUNK_SIZE
                chunk = input_file.read( read_size )
                fh.write( chunk )
                part_size = part_size - read_size
            fh.flush()
            fh.seek(0)
            mp.upload_part_from_file( fh, i )
            fh.close()
        upload_result = mp.complete_upload()
    else:
        print('Performing simple put upload.')
        upload_url = "uploadurl"
        content_md5 = hashlib.md5()
        chunk_write( input_file, content_md5, target_method="update" )
        input_file.seek( 0 )  # back to start, for uploading

        upload_params = { 'Content-Length': content_length, 'Content-MD5': base64.standard_b64encode( content_md5.digest() ), 'Content-Type': content_type }
        upload_url = "%s/%s/%s%s/%s?%s" % ( dm_url, GENOMESPACE_API_VERSION_STRING, upload_url, target_directory_dict['path'], quote( target_filename, safe='' ), urlencode( upload_params ) )
        new_file_request = Request( upload_url )  # , headers = { 'Content-Type': 'application/json', 'Accept': 'application/text' } ) #apparently http://www.genomespace.org/team/specs/updated-dm-rest-api:"Every HTTP request to the Data Manager should include the Accept header with a preference for the media types application/json and application/text." is not correct
        new_file_request.get_method = lambda: 'GET'
        # get url to upload to
        target_upload_url = url_opener.open( new_file_request ).read()
        # upload file to determined url
        upload_headers = dict( upload_params )
        # upload_headers[ 'x-amz-meta-md5-hash' ] = content_md5.hexdigest()
        upload_headers[ 'Accept' ] = 'application/json'
        upload_file_request = Request( target_upload_url, headers=upload_headers, data=input_file )
        upload_file_request.get_method = lambda: 'PUT'
        upload_result = urlopen( upload_file_request ).read()
    result_url = "%s/%s" % ( target_directory_dict['url'], quote( target_filename, safe='' ) )
    # determine available gs launch apps
    web_tools = get_genome_space_launch_apps( genomespace_site_dict['atmServer'], url_opener, result_url, file_type )
    if log_filename:
        log_file = open( log_filename, 'wb' )
        log_file.write( "<html><head><title>File uploaded to GenomeSpace from Galaxy</title></head><body>\n" )
        log_file.write( '<p>Uploaded <a href="%s">%s/%s</a> to GenomeSpace.</p>\n' % ( result_url, target_directory_dict['path'], target_filename ) )
        if web_tools:
            log_file.write( "<p>You may open this file directly in the following applications:</p>\n" )
            log_file.write( '<p><ul>\n' )
            for web_tool in web_tools:
                log_file.write( '<li><a href="%s">%s</a></li>\n' % ( web_tool ) )
            log_file.write( '</p></ul>\n' )
        else:
            log_file.write( '<p>There are no GenomeSpace applications available for file type: %s</p>\n' % ( file_type ) )
        log_file.write( "</body></html>\n" )
    return upload_result
コード例 #51
0
def download_from_genomespace_importer( username, token, json_parameter_file, genomespace_site, gs_toolname ):
    json_params = json.loads( open( json_parameter_file, 'r' ).read() )
    datasource_params = json_params.get( 'param_dict' )
    assert None not in [ username, token ], "Missing GenomeSpace username or token."
    output_filename = datasource_params.get( "output_file1", None )
    dataset_id = base_dataset_id = json_params['output_data'][0]['dataset_id']
    hda_id = json_params['output_data'][0]['hda_id']
    url_opener = get_cookie_opener( username, token, gs_toolname=gs_toolname )
    # load and set genomespace format ids to galaxy exts
    genomespace_site_dict = get_genomespace_site_urls()[ genomespace_site ]
    set_genomespace_format_identifiers( url_opener, genomespace_site_dict['dmServer'] )
    file_url_name = "URL"
    metadata_parameter_file = open( json_params['job_config']['TOOL_PROVIDED_JOB_METADATA_FILE'], 'wb' )
    # setup datatypes registry for sniffing
    datatypes_registry = Registry()
    datatypes_registry.load_datatypes( root_dir=json_params[ 'job_config' ][ 'GALAXY_ROOT_DIR' ], config=json_params[ 'job_config' ][ 'GALAXY_DATATYPES_CONF_FILE' ] )
    url_param = datasource_params.get( file_url_name, None )
    used_filenames = []
    for download_url in url_param.split( ',' ):
        using_temp_file = False
        parsed_url = urlparse( download_url )
        query_params = parse_qs( parsed_url[4] )
        # write file to disk
        new_file_request = Request( download_url )
        new_file_request.get_method = lambda: 'GET'
        target_download_url = url_opener.open( new_file_request )
        filename = None
        if 'Content-Disposition' in target_download_url.info():
            content_disposition = dict( x.strip().split('=') if '=' in x else ( x.strip(), '' ) for x in target_download_url.info()['Content-Disposition'].split( ';' ) )
            if 'filename' in content_disposition:
                filename = content_disposition[ 'filename' ].strip( "\"'" )
        if not filename:
            parsed_url = urlparse( download_url )
            query_params = parse_qs( parsed_url[4] )
            filename = unquote_plus( parsed_url[2].split( '/' )[-1] )
        if not filename:
            filename = download_url
        if output_filename is None:
            # need to use a temp file here, because we do not know the ext yet
            using_temp_file = True
            output_filename = tempfile.NamedTemporaryFile( prefix='tmp-genomespace-importer-' ).name
        output_file = open( output_filename, 'wb' )
        chunk_write( target_download_url, output_file )
        output_file.close()

        # determine file format
        file_type = None
        if 'dataformat' in query_params:  # this is a converted dataset
            file_type = query_params[ 'dataformat' ][0]
            file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type )
        else:
            try:
                # get and use GSMetadata object
                download_file_path = download_url.split( "%s/file/" % ( genomespace_site_dict['dmServer'] ), 1)[-1]  # FIXME: This is a very bad way to get the path for determining metadata. There needs to be a way to query API using download URLto get to the metadata object
                metadata_request = Request( "%s/%s/filemetadata/%s" % ( genomespace_site_dict['dmServer'], GENOMESPACE_API_VERSION_STRING, download_file_path ) )
                metadata_request.get_method = lambda: 'GET'
                metadata_url = url_opener.open( metadata_request )
                file_metadata_dict = json.loads( metadata_url.read() )
                metadata_url.close()
                file_type = file_metadata_dict.get( 'dataFormat', None )
                if file_type and file_type.get( 'url' ):
                    file_type = file_type.get( 'url' )
                    file_type = get_galaxy_ext_from_genomespace_format_url( url_opener, file_type, default=None )
            except:
                pass
        if file_type is None:
            # try to sniff datatype
            try:
                file_type = sniff.handle_uploaded_dataset_file( output_filename, datatypes_registry )
            except:
                pass  # sniff failed
        if file_type is None and '.' in parsed_url[2]:
            # still no known datatype, fall back to using extension
            file_type = parsed_url[2].rsplit( '.', 1 )[-1]
            file_type = GENOMESPACE_EXT_TO_GALAXY_EXT.get( file_type, file_type )
        if file_type is None:
            # use default extension (e.g. 'data')
            file_type = DEFAULT_GALAXY_EXT

        # save json info for single primary dataset
        if dataset_id is not None:
            metadata_parameter_file.write( "%s\n" % json.dumps( dict( type='dataset',
                                                                      dataset_id=dataset_id,
                                                                      ext=file_type,
                                                                      name="GenomeSpace importer on %s" % ( filename ) ) ) )
        # if using tmp file, move the file to the new file path dir to get scooped up later
        if using_temp_file:
            original_filename = filename
            filename = ''.join( c in FILENAME_VALID_CHARS and c or '-' for c in filename )
            while filename in used_filenames:
                filename = "-%s" % filename
            used_filenames.append( filename )
            target_output_filename = os.path.join( os.getcwd(), 'primary_%i_%s_visible_%s' % ( hda_id, filename, file_type ) )
            shutil.move( output_filename, target_output_filename )
            metadata_parameter_file.write( "%s\n" % json.dumps( dict( type='new_primary_dataset',
                                                                      base_dataset_id=base_dataset_id,
                                                                      ext=file_type,
                                                                      filename=target_output_filename,
                                                                      name="GenomeSpace importer on %s" % ( original_filename ) ) ) )
        dataset_id = None  # only one primary dataset available
        output_filename = None  # only have one filename available
    metadata_parameter_file.close()
    return True