Example #1
0
 def from_data(self, image_data, filetype=None, fileext='png'):
     if filetype:
         image_data = ('image.bin', image_data, filetype)
     else:
         # content-type guessed from file extension
         image_data = ('image.%s' % fileext, image_data)
     form_multipart = OrderedDict([
         ('MAX_FILE_SIZE', '10485760'),
         ('image', image_data),
         ('url', ''),
         ('search', 'search'),
         ('nsfwfilter', 'off'),
         ('subreddit[pics]', 'off'),
         ('subreddit[funny]', 'off'),
         ('subreddit[wtf]', 'off'),
         ('subreddit[nsfw]', 'off'),
         ('subreddit[others]', 'off'),
         ('subreddit[all]', 'off'),
     ])
     body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
     headers = {
         b'Content-Type': content_type,
         b'X-Requested-With': b'XMLHttpRequest',
         b'DNT': b'1',
     }
     return Request(self.search_image_url, method='POST', body=body, headers=headers)
Example #2
0
    def test_field_encoding(self):
        fieldsets = [
            [('k', 'v'), ('k2', 'v2')],
            [('k', b'v'), (u('k2'), b'v2')],
            [('k', b'v'), (u('k2'), 'v2')],
        ]

        for fields in fieldsets:
            encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)

            self.assertEqual(encoded,
                b'--' + b(BOUNDARY) + b'\r\n'
                b'Content-Disposition: form-data; name="k"\r\n'
                b'Content-Type: text/plain\r\n'
                b'\r\n'
                b'v\r\n'
                b'--' + b(BOUNDARY) + b'\r\n'
                b'Content-Disposition: form-data; name="k2"\r\n'
                b'Content-Type: text/plain\r\n'
                b'\r\n'
                b'v2\r\n'
                b'--' + b(BOUNDARY) + b'--\r\n'
                , fields)

            self.assertEqual(content_type,
                b'multipart/form-data; boundary=' + b(BOUNDARY))
Example #3
0
    def from_url(self, image_url):
        # do not 'optimize' .gifv links for KD
        if not image_url.endswith('.gifv'):
            image_url = find_media_url(image_url, self.settings)

        #form_urlencoded = OrderedDict([
        #    ('kdtoolver', 'b1'),
        #    ('q', image_url),
        #])
        #return FormRequest(self.search_url, method='GET', formdata=form_urlencoded)

        # use POST, more in line with browser
        form_multipart = OrderedDict([
            ('MAX_FILE_SIZE', '10485760'),
            ('image', ''),
            ('url', image_url),
            ('search', 'search'),
            ('nsfwfilter', 'off'),
            ('subreddit[pics]', 'off'),
            ('subreddit[funny]', 'off'),
            ('subreddit[wtf]', 'off'),
            ('subreddit[nsfw]', 'off'),
            ('subreddit[others]', 'off'),
            ('subreddit[all]', 'off'),
        ])
        body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
        headers = {
            b'Content-Type': content_type,
            b'DNT': b'1',
        }
        return Request(self.search_image_url, method='POST', body=body, headers=headers)
Example #4
0
 def from_data(self, image_data, filetype=None, fileext='png'):
     # bing transcodes images with javascript, then submits base64-encoded jpeg data...
     image_size = len(image_data) / 1024
     image_data, (width, height) = convert_image(image_data)
     image_data = base64.b64encode(image_data) # base64 encoded submission
     image_data = (None, image_data, None)
     form_multipart = OrderedDict([
         ('imgurl', ''),
         ('cbir', 'sbi'),
         ('imageBin', image_data),
     ])
     form_urlencoded = OrderedDict([
         ('q', ''),
         ('view', 'detailv2'),
         ('iss', 'sbi'),
         ('FORM', 'IRSBIQ'),
         # probably nobody cares about that, but fake it anyway
         ('sbifsz', u'%s x %s · %s kB · %s'.encode('utf-8') \
                 % (width, height, image_size, fileext.encode('utf-8'))),
         ('sbifnm', 'image.%s' % fileext), # our "filename"
         ('thw', width),
         ('thh', height),
         # disable safe search (TODO: only for nfsw sub searches?)
         #('adlt', 'off'), # doesn't work anymore...
     ])
     qstring = '?' + urlencode(form_urlencoded)
     body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
     headers = {
         b'Accept-Language': b'en-US,en;q=0.5',
         b'Content-Type': content_type,
         b'DNT': b'1',
     }
     return Request(self.search_image_url + qstring, method='POST', cookies=self.cookies,
                    body=body, headers=headers, callback=self.parse_image)
Example #5
0
    def test_filename(self):
        fields = [('k', ('somename', b'v'))]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)

        self.assertEqual(encoded,
            b'--' + b(BOUNDARY) + b'\r\n'
            b'Content-Disposition: form-data; name="k"; filename="somename"\r\n'
            b'Content-Type: application/octet-stream\r\n'
            b'\r\n'
            b'v\r\n'
            b'--' + b(BOUNDARY) + b'--\r\n'
            )

        self.assertEqual(content_type,
            b'multipart/form-data; boundary=' + b(BOUNDARY))


	def test_textplain(self):
	    fields = [('k', ('somefile.txt', b'v'))]

	    encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)

	    self.assertEqual(encoded,
	        b'--' + b(BOUNDARY) + b'\r\n'
	        b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n'
	        b'Content-Type: text/plain\r\n'
	        b'\r\n'
	        b'v\r\n'
	        b'--' + b(BOUNDARY) + b'--\r\n'
	        )

	    self.assertEqual(content_type,
	        b'multipart/form-data; boundary=' + b(BOUNDARY))
Example #6
0
    def post(self, path, data={}, multipart=False, **kwargs):
        """
        Convenience wrapper for the ``http_client``.

        :param multipart:
            If True the given ``data`` is encoded "multipart/form-data" through
            ``urllib3``
            If the value is a tuple of two elements, then the first element is
            treated as the filename of the form-data section.
        """
        body = None
        headers = {}
        if multipart:
            body, content_type = encode_multipart_formdata(data)
            headers["Content-Type"] = content_type
        else:
            body = urllib.urlencode(data, doseq=True)

        if 'headers' in kwargs:
            kwargs['headers'].update(headers)

        return self.fetch(
            path,
            method="POST",
            body=body,
            headers=headers,
            **kwargs
        )
Example #7
0
    def _encode_files(files, data):
        """Build the body for a multipart/form-data request.

        Will successfully encode files when passed as a dict or a list of
        2-tuples. Order is retained if data is a list of 2-tuples but arbitrary
        if parameters are supplied as a dict.

        """
        if (not files):
            raise ValueError("Files must be provided.")
        elif isinstance(data, basestring):
            raise ValueError("Data must not be a string.")

        new_fields = []
        fields = to_key_val_list(data or {})
        files = to_key_val_list(files or {})

        for field, val in fields:
            if isinstance(val, basestring) or not hasattr(val, '__iter__'):
                val = [val]
            for v in val:
                if v is not None:
                    # Don't call str() on bytestrings: in Py3 it all goes wrong.
                    if not isinstance(v, bytes):
                        v = str(v)

                    new_fields.append(
                        (field.decode('utf-8') if isinstance(field, bytes) else field,
                         v.encode('utf-8') if isinstance(v, str) else v))

        for (k, v) in files:
            # support for explicit filename
            ft = None
            fh = None
            if isinstance(v, (tuple, list)):
                if len(v) == 2:
                    fn, fp = v
                elif len(v) == 3:
                    fn, fp, ft = v
                else:
                    fn, fp, ft, fh = v
            else:
                fn = guess_filename(v) or k
                fp = v

            if isinstance(fp, (str, bytes, bytearray)):
                fdata = fp
            else:
                fdata = fp.read()

            rf = RequestField(name=k, data=fdata,
                              filename=fn, headers=fh)
            rf.make_multipart(content_type=ft)
            new_fields.append(rf)

        body, content_type = encode_multipart_formdata(new_fields)

        return body, content_type
Example #8
0
    def test_input_datastructures(self):
        fieldsets = [
            dict(k='v', k2='v2'),
            [('k', 'v'), ('k2', 'v2')],
        ]

        for fields in fieldsets:
            encoded, _ = encode_multipart_formdata(fields, boundary=BOUNDARY)
            self.assertEqual(encoded.count(b(BOUNDARY)), 3)
Example #9
0
 def test_control_style(self):
     fields = [(u('n\u00e4me\u011b'), u('va\u0142u\u00ea'))]
     encoded, content_type = encode_multipart_formdata(
         fields, boundary=BOUNDARY, field_encoding_style = 'RFC2231')
     self.assertEquals(encoded,
         b'--' + b(BOUNDARY) + b'\r\n'
         b"Content-Disposition: form-data; name*=utf-8''n%C3%A4me%C4%9B\r\n"
         b'\r\n'
         b'va\xc5\x82u\xc3\xaa\r\n'
         b'--' + b(BOUNDARY) + b'--\r\n'
         )
Example #10
0
 def test_control_encoding(self):
     fields = [(u('n\u00e4me\u011b'), u('va\u0142u\u00ea'))]
     encoded, content_type = encode_multipart_formdata(
         fields, boundary=BOUNDARY, form_data_encoding = 'iso-8859-1')
     self.assertEquals(encoded,
         b'--' + b(BOUNDARY) + b'\r\n'
         b'Content-Disposition: form-data; name="n\xe4meě"\r\n'
         b'\r\n'
         b'vału\xea\r\n'
         b'--' + b(BOUNDARY) + b'--\r\n'
         )
Example #11
0
    def test_request_fields(self):
      fields = [RequestField('k', b'v', filename='somefile.txt', headers={'Content-Type': 'image/jpeg'})]

      encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)

      self.assertEqual(encoded,
          b'--' + b(BOUNDARY) + b'\r\n'
          b'Content-Type: image/jpeg\r\n'
          b'\r\n'
          b'v\r\n'
          b'--' + b(BOUNDARY) + b'--\r\n'
          )
Example #12
0
    def test_request_fields(self):
        fields = [RequestField('k', b'v',
                               filename='somefile.txt',
                               headers={'Content-Type': 'image/jpeg'})]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Type: image/jpeg\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected
Example #13
0
    def test_explicit(self):
        fields = [('k', ('somefile.txt', b'v', 'image/jpeg'))]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n'
                    b'Content-Type: image/jpeg\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected

        assert content_type == 'multipart/form-data; boundary=' + str(BOUNDARY)
def _query(
    vuforia_database: VuforiaDatabase,
    high_quality_image: io.BytesIO,
) -> Endpoint:
    """
    Return details of the endpoint for making an image recognition query.
    """
    image_content = high_quality_image.read()
    date = rfc_1123_date()
    request_path = '/v1/query'
    files = {'image': ('image.jpeg', image_content, 'image/jpeg')}
    method = POST

    content, content_type_header = encode_multipart_formdata(files)

    access_key = vuforia_database.client_access_key
    secret_key = vuforia_database.client_secret_key
    authorization_string = authorization_header(
        access_key=access_key,
        secret_key=secret_key,
        method=method,
        content=content,
        # Note that this is not the actual Content-Type header value sent.
        content_type='multipart/form-data',
        date=date,
        request_path=request_path,
    )

    headers = {
        'Authorization': authorization_string,
        'Date': date,
        'Content-Type': content_type_header,
    }

    request = requests.Request(
        method=method,
        url=urljoin(base=VWQ_HOST, url=request_path),
        headers=headers,
        data=content,
    )

    prepared_request = request.prepare()

    return Endpoint(
        successful_headers_status_code=HTTPStatus.OK,
        successful_headers_result_code=ResultCodes.SUCCESS,
        prepared_request=prepared_request,
        access_key=access_key,
        secret_key=secret_key,
    )
Example #15
0
def multipart(name, data, content_type='image/jpeg'):
    """Encode data as multipart form

    data will be encoded as form file field with name and filename
    equal to `name`

    returns tuple of (encoded body, content type)
    """

    fields = {name: (name, data, content_type)}

    encoded = filepost.encode_multipart_formdata(fields)

    return encoded
Example #16
0
    def test_filename(self):
        fields = [('k', ('somename', b'v'))]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"; filename="somename"\r\n'
                    b'Content-Type: application/octet-stream\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected

        assert content_type == 'multipart/form-data; boundary=' + str(BOUNDARY)
Example #17
0
    def test_explicit(self):
        fields = [('k', ('somefile.txt', b'v', 'image/jpeg'))]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n'
                    b'Content-Type: image/jpeg\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected

        assert content_type == 'multipart/form-data; boundary=' + str(BOUNDARY)
Example #18
0
def test_anno_image_input_http_request_too_many_files(input_adapter, img_file,
                                                      json_file):
    body, content_type = encode_multipart_formdata(
        dict(
            image=("test.jpg", read_bin(img_file)),
            image2=("test.jpg", read_bin(img_file)),
            annotations=("test.json", read_bin(json_file)),
        ))
    headers = (("Content-Type", content_type), )
    request = HTTPRequest(headers=headers, body=body)

    task = input_adapter.from_http_request(request)
    assert task.data[0].read() == read_bin(img_file)
    assert task.data[1].read() == read_bin(json_file)
Example #19
0
    def test_filename(self):
        fields = [('k', ('somename', b'v'))]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"; filename="somename"\r\n'
                    b'Content-Type: application/octet-stream\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected

        assert content_type == 'multipart/form-data; boundary=' + str(BOUNDARY)
def test_file_input_aws_lambda_event(input_adapter, bin_file):
    file_bytes = open(str(bin_file), 'rb').read()

    body, content_type = encode_multipart_formdata(
        dict(
            x=("test.bin", file_bytes),
            y=("test.bin", file_bytes),
        ))
    headers = {"Content-Type": content_type}
    aws_lambda_event = {"headers": headers, "body": body}

    task = input_adapter.from_aws_lambda_event(aws_lambda_event)
    assert b'\x810\x899' == task.data[0].read()
    assert b'\x810\x899' == task.data[1].read()
Example #21
0
    def _encode_files(files, data):
        """Build the body for a multipart/form-data request.

        Will successfully encode files when passed as a dict or a list of
        2-tuples. Order is retained if data is a list of 2-tuples but abritrary
        if parameters are supplied as a dict.

        """
        if (not files) or isinstance(data, str):
            return None

        new_fields = []
        fields = to_key_val_list(data or {})
        files = to_key_val_list(files or {})

        for field, val in fields:
            if isinstance(val, basestring) or not hasattr(val, '__iter__'):
                val = [val]
            for v in val:
                if v is not None:
                    new_fields.append(
                        (field.decode('utf-8') if isinstance(field, bytes) else field,
                         v.encode('utf-8') if isinstance(v, str) else v))

        for (k, v) in files:
            # support for explicit filename
            ft = None
            if isinstance(v, (tuple, list)):
                if len(v) == 2:
                    fn, fp = v
                else:
                    fn, fp, ft = v
            else:
                fn = guess_filename(v) or k
                fp = v
            if isinstance(fp, str):
                fp = StringIO(fp)
            if isinstance(fp, bytes):
                fp = BytesIO(fp)

            if ft:
                new_v = (fn, fp.read(), ft)
            else:
                new_v = (fn, fp.read())
            new_fields.append((k, new_v))

        body, content_type = encode_multipart_formdata(new_fields)

        return body, content_type
Example #22
0
    def test_field_encoding(self, fields):
        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k2"\r\n'
                    b'\r\n'
                    b'v2\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected

        assert content_type == 'multipart/form-data; boundary=' + str(BOUNDARY)
Example #23
0
def generate_multipart_body(image_file, json_file=None):
    image = ("image.jpg", open(image_file, "rb").read())
    files = {"image.jpg": image}

    if json_file:
        json = ("annotations.json", open(json_file, "rb").read())
        files["annotations.json"] = json

    body, content_type = encode_multipart_formdata(files)

    headers = {
        'Content-Type': content_type,
        'Content-Length': len(body),
    }
    return body, headers
Example #24
0
    def test_filename(self):
        fields = [('k', ('somename', 'v'))]

        encoded, content_type = encode_multipart_formdata(fields,
                                                          boundary=BOUNDARY)

        self.assertEqual(
            encoded, b'-----boundary---\r\n'
            b'Content-Disposition: form-data; name="k"; filename="somename"\r\n'
            b'Content-Type: application/octet-stream\r\n'
            b'\r\n'
            b'v\r\n-----boundary-----\r\n')

        self.assertEqual(content_type,
                         b'multipart/form-data; boundary=---boundary---')
Example #25
0
    def test_field_encoding(self, fields):
        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k2"\r\n'
                    b'\r\n'
                    b'v2\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        assert encoded == expected

        assert content_type == 'multipart/form-data; boundary=' + str(BOUNDARY)
Example #26
0
    def test_textplain(self):
        fields = [('k', ('somefile.txt', b'v'))]

        encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
        expected = (b'--' + b(BOUNDARY) + b'\r\n'
                    b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n'
                    b'Content-Type: text/plain\r\n'
                    b'\r\n'
                    b'v\r\n'
                    b'--' + b(BOUNDARY) + b'--\r\n')

        self.assertEqual(encoded, expected)

        self.assertEqual(content_type,
                         'multipart/form-data; boundary=' + str(BOUNDARY))
Example #27
0
def test_image_input_http_request_multipart_form(make_api, img_file):
    api = make_api(LegacyImageInput(input_names=("image",)), predict)

    with open(img_file, "rb") as f:
        img_bytes = f.read()

    body, content_type = encode_multipart_formdata(dict(image=("test.jpg", img_bytes),))
    request = mock.MagicMock(spec=flask.Request)
    request.method = "POST"
    request.headers = {"Content-Type": content_type}
    request.get_data.return_value = body
    response = api.handle_request(request)

    assert response.status_code == 200
    assert "[10, 10, 3]" in str(response.response)
Example #28
0
 def from_data(self, image_data, filetype=None, fileext='png'):
     if filetype:
         image_data = ('image.bin', image_data, filetype)
     else:
         # content-type guessed from file extension
         image_data = ('image.%s' % fileext, image_data)
     form_multipart = OrderedDict([
         ('image', image_data),
     ])
     body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
     headers = {
         b'Accept-Language': b'en-US,en;q=0.5',
         b'Content-Type': content_type,
         b'DNT': b'1',
     }
     return Request(self.search_image_url, method='POST', body=body, headers=headers)
Example #29
0
    def test_filename(self) -> None:
        fields = [("k", ("somename", b"v"))]

        encoded, content_type = encode_multipart_formdata(fields,
                                                          boundary=BOUNDARY)
        expected = (
            b"--" + BOUNDARY_BYTES + b"\r\n"
            b'Content-Disposition: form-data; name="k"; filename="somename"\r\n'
            b"Content-Type: application/octet-stream\r\n"
            b"\r\n"
            b"v\r\n"
            b"--" + BOUNDARY_BYTES + b"--\r\n")

        assert encoded == expected

        assert content_type == "multipart/form-data; boundary=" + str(BOUNDARY)
Example #30
0
def multipart(name, data, content_type='image/jpeg'):
    """Encode data as multipart form

    data will be encoded as form file field with name and filename
    equal to `name`

    returns tuple of (encoded body, content type)
    """

    fields = {
        name: (name, data, content_type)
    }

    encoded = filepost.encode_multipart_formdata(fields)

    return encoded
Example #31
0
    def test_textplain(self):
        fields = [("k", ("somefile.txt", b"v"))]

        encoded, content_type = encode_multipart_formdata(fields,
                                                          boundary=BOUNDARY)
        expected = (
            b"--" + b(BOUNDARY) + b"\r\n"
            b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n'
            b"Content-Type: text/plain\r\n"
            b"\r\n"
            b"v\r\n"
            b"--" + b(BOUNDARY) + b"--\r\n")

        assert encoded == expected

        assert content_type == "multipart/form-data; boundary=" + str(BOUNDARY)
Example #32
0
    def test_field_encoding(self, fields: _TYPE_FIELDS) -> None:
        encoded, content_type = encode_multipart_formdata(fields,
                                                          boundary=BOUNDARY)
        expected = (b"--" + BOUNDARY_BYTES + b"\r\n"
                    b'Content-Disposition: form-data; name="k"\r\n'
                    b"\r\n"
                    b"v\r\n"
                    b"--" + BOUNDARY_BYTES + b"\r\n"
                    b'Content-Disposition: form-data; name="k2"\r\n'
                    b"\r\n"
                    b"v2\r\n"
                    b"--" + BOUNDARY_BYTES + b"--\r\n")

        assert encoded == expected

        assert content_type == "multipart/form-data; boundary=" + str(BOUNDARY)
Example #33
0
    def test_explicit(self) -> None:
        fields = [("k", ("somefile.txt", b"v", "image/jpeg"))]

        encoded, content_type = encode_multipart_formdata(fields,
                                                          boundary=BOUNDARY)
        expected = (
            b"--" + BOUNDARY_BYTES + b"\r\n"
            b'Content-Disposition: form-data; name="k"; filename="somefile.txt"\r\n'
            b"Content-Type: image/jpeg\r\n"
            b"\r\n"
            b"v\r\n"
            b"--" + BOUNDARY_BYTES + b"--\r\n")

        assert encoded == expected

        assert content_type == "multipart/form-data; boundary=" + str(BOUNDARY)
 def __init__(self, check_only, chunksize=8192):
     self.check_only = check_only
     self.chunksize = chunksize
     self.start = time.time()
     self.uploaded = False
     fields = [
         ('name', package),
         ('{}sum'.format(hashtype), hsh),
     ]
     if check_only:
         fields.append(('filename', filename))
     else:
         with open(filename, 'rb') as f:
             rf = RequestField('file', f.read(), filename)
             rf.make_multipart()
             fields.append(rf)
     self.data, content_type = encode_multipart_formdata(fields)
     self.headers = {'Content-Type': content_type}
    def encode_body(self):
        new_fields = []
        for field, val in self.form_fields:
            if isinstance(val, basestring) or not hasattr(val, '__iter__'):
                val = [val]
            for v in val:
                if v is not None:
                    # Don't call str() on bytestrings: in Py3 it all goes wrong.
                    if not isinstance(v, bytes):
                        v = str(v)

                    new_fields.append(
                        (field.decode('utf-8') if isinstance(field, bytes) else field,
                         v.encode('utf-8') if isinstance(v, str) else v))
        if not self.files:
            self.files = {}
        for k, v in self.files.items():
            # support for explicit filename
            ft = None
            fh = None
            if isinstance(v, (tuple, list)):
                if len(v) == 2:
                    fn, fp = v
                elif len(v) == 3:
                    fn, fp, ft = v
                else:
                    fn, fp, ft, fh = v
            else:
                fn = guess_filename(v) or k
                fp = v

            if isinstance(fp, (str, bytes, bytearray)):
                fdata = fp
            else:
                fdata = fp.read()

            rf = RequestField(name=k, data=fdata,
                              filename=fn, headers=fh)
            rf.make_multipart(content_type=ft)
            new_fields.append(rf)

        body, content_type = encode_multipart_formdata(new_fields)
        return body,content_type
Example #36
0
    def test_request_fields(self) -> None:
        fields = [
            RequestField(
                "k",
                b"v",
                filename="somefile.txt",
                headers={"Content-Type": "image/jpeg"},
            )
        ]

        encoded, content_type = encode_multipart_formdata(fields,
                                                          boundary=BOUNDARY)
        expected = (b"--" + BOUNDARY_BYTES + b"\r\n"
                    b"Content-Type: image/jpeg\r\n"
                    b"\r\n"
                    b"v\r\n"
                    b"--" + BOUNDARY_BYTES + b"--\r\n")

        assert encoded == expected
Example #37
0
    def from_url(self, image_url):
        image_url = find_media_url(image_url, self.settings)

        if False:
            # prefer non-https URLs, BING can't find images in https:// urls!?
            if image_url.startswith('https'):
                image_url = image_url.replace('https', 'http', True)

            form_urlencoded = OrderedDict([
                ('FORM', 'IRSBIQ'),
                ('cbir', 'sbi'),
                ('imgurl', image_url),
                # disable safe search (TODO: only for nfsw sub searches?)
                #('adlt', 'off'), # doesn't work anymore...
            ])
            return FormRequest(self.search_url, method='GET', formdata=form_urlencoded)

        else:
            # this seems to be a newer version of bing, and seems to finds results
            # for more urls as well
            form_multipart = OrderedDict([
                ('imgurl', image_url),
                ('cbir', 'sbi'),
                ('imageBin', ''),
            ])
            form_urlencoded = OrderedDict([
                ('q', 'imgurl:%s' % image_url),
                ('view', 'detailv2'),
                ('iss', 'sbi'),
                ('FORM', 'IRSBIQ'),
                # disable safe search (TODO: only for nfsw sub searches?)
                #('adlt', 'off'), # doesn't work anymore...
            ])
            qstring = '?' + urlencode(form_urlencoded)
            body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
            headers = {
                b'Accept-Language': b'en-US,en;q=0.5',
                b'Content-Type': content_type,
                b'DNT': b'1',
            }
            return Request(self.search_image_url + qstring, method='POST', cookies=self.cookies,
                        body=body, headers=headers, callback=self.parse_image)
def test_file_input_http_request_malformatted_input_missing_file(
        input_adapter, bin_file):
    file_bytes = open(str(bin_file), 'rb').read()
    requests = []

    body = b''
    headers = (("Content-Type", "multipart/form-data; boundary=123456"), )
    requests.append(HTTPRequest(headers=headers, body=body))

    body = file_bytes
    headers = (("Content-Type", "images/jpeg"), )
    requests.append(HTTPRequest(headers=headers, body=body))

    body, content_type = encode_multipart_formdata(
        dict(x=("test.bin", file_bytes), ))
    headers = (("Content-Type", content_type), )
    requests.append(HTTPRequest(headers=headers, body=body))

    for task in map(input_adapter.from_http_request, requests):
        assert task.is_discarded
Example #39
0
 def from_data(self, image_data, filetype=None, fileext='png'):
     if filetype:
         image_data = ('image.bin', image_data, filetype)
     else:
         # content-type guessed from file extension
         image_data = ('image.%s' % fileext, image_data)
     form_multipart = OrderedDict([
         ('image_url', ''),
         ('encoded_image', image_data),
         ('image_content', ''),
         ('filename', ''),
         ('hl', 'en'),
         # disable safe search (TODO: only for nfsw sub searches?)
         ('safe', 'off'),
     ])
     body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
     headers = {
         b'Accept-Language': b'en-US,en;q=0.5',
         b'Content-Type': content_type,
         b'DNT': b'1',
     }
     return Request(self.search_image_url, method='POST', body=body, headers=headers)
Example #40
0
def test_anno_image_input_batch_request_skip_bad(img_file, json_file):
    adapter = AnnotatedImageInput(is_batch_input=True)

    multipart_data, headers = generate_multipart_body(img_file, json_file)

    empty_request = SimpleRequest(headers=headers, data=None)

    request = SimpleRequest.from_flask_request(
        Request.from_values(
            data=multipart_data,
            content_type=headers['Content-Type'],
            content_length=headers['Content-Length'],
        ))

    image = ("image.jpg", open(img_file, "rb").read())
    json = ("annotations.jso", open(json_file, "rb").read())
    files = {"image.invalid": image, "annotations.invalid": json}
    bad_data, content_type = encode_multipart_formdata(files)

    bad_request = SimpleRequest.from_flask_request(
        Request.from_values(
            data=bad_data,
            content_type=content_type,
            content_length=len(bad_data),
        ))

    responses = adapter.handle_batch_request(
        [empty_request, request, bad_request], predict_image_and_json)

    assert len(responses) == 3
    assert responses[0] is None
    assert responses[1].status == 200 and responses[
        1].data == '[[10, 10, 3], "kaith"]'
    assert responses[2] is None

    bad_responses = adapter.handle_batch_request([empty_request],
                                                 predict_image_and_json)
    assert len(bad_responses) == 1
    assert bad_responses[0] is None
Example #41
0
    def test_field_encoding(self):
        fieldsets = [
            [('k', 'v'), ('k2', 'v2')],
            [('k', b'v'), (u('k2'), b'v2')],
            [('k', b'v'), (u('k2'), 'v2')],
        ]

        for fields in fieldsets:
            encoded, content_type = encode_multipart_formdata(fields, boundary=BOUNDARY)
            expected = (b'--' + b(BOUNDARY) + b'\r\n'
                        b'Content-Disposition: form-data; name="k"\r\n'
                        b'\r\n'
                        b'v\r\n'
                        b'--' + b(BOUNDARY) + b'\r\n'
                        b'Content-Disposition: form-data; name="k2"\r\n'
                        b'\r\n'
                        b'v2\r\n'
                        b'--' + b(BOUNDARY) + b'--\r\n')

            self.assertEqual(encoded, expected, fields)

            self.assertEqual(content_type,
                             'multipart/form-data; boundary=' + str(BOUNDARY))
Example #42
0
def _upload_to_dataset_local(connector, host, clowder_user, clowder_pass,
                             datasetid, filepath):
    """Upload file POINTER to existing Clowder dataset. Does not copy actual file bytes.

    Keyword arguments:
    connector -- connector information, used to get missing parameters and send status updates
    host -- the clowder host, including http and port, should end with a /
    key -- the secret key to login to clowder
    datasetid -- the dataset that the file should be associated with
    filepath -- path to file
    """

    logger = logging.getLogger(__name__)
    url = '%sapi/uploadToDataset/%s' % (host, datasetid)

    if os.path.exists(filepath):
        # Replace local path with remote path before uploading
        for source_path in connector.mounted_paths:
            if filepath.startswith(connector.mounted_paths[source_path]):
                filepath = filepath.replace(
                    connector.mounted_paths[source_path], source_path)
                break

        (content, header) = encode_multipart_formdata([
            ("file", '{"path":"%s"}' % filepath)
        ])
        result = connector.post(url,
                                data=content,
                                headers={'Content-Type': header},
                                auth=(clowder_user, clowder_pass))

        uploadedfileid = result.json()['id']
        logger.debug("uploaded file id = [%s]", uploadedfileid)

        return uploadedfileid
    else:
        logger.error("unable to upload local file %s (not found)", filepath)
Example #43
0
 def from_data(self, image_data, filetype=None, fileext='png'):
     if filetype:
         image_data = ('image.bin', image_data, filetype)
     else:
         # content-type guessed from file extension
         image_data = ('image.%s' % fileext, image_data)
     form_multipart = OrderedDict([
         ('upfile', image_data),
         #('format', 'json'),
         #('request', '[{"block":"b-page_type_search-by-image__link"}]'),
         ('rpt', 'imageview'),
     ])
     form_urlencoded = OrderedDict([
         ('uinfo', 'sw-1440-sh-900-ww-1440-wh-775-pd-1-wp-16x10_1440x900'), # some fake browser info
         ('rpt', 'imageview'),
     ])
     qstring = '?' + urlencode(form_urlencoded)
     body, content_type = encode_multipart_formdata(form_multipart, boundary=None)
     headers = {
         b'Content-Type': content_type,
         b'X-Requested-With': b'XMLHttpRequest',
         b'DNT': b'1',
     }
     return Request(self.search_image_url + qstring, method='POST', body=body, headers=headers, cookies=self.cookies)
Example #44
0
File: urllib3.py Project: ad-m/grab
    def process_config(self, grab):
        req = Request(data=None)

        try:
            request_url = normalize_url(grab.config['url'])
        except Exception as ex:
            raise error.GrabInvalidUrl(
                u'%s: %s' % (six.text_type(ex), grab.config['url']))
        req.url = request_url

        method = grab.detect_request_method()
        req.method = make_str(method)

        req.body_maxsize = grab.config['body_maxsize']
        if grab.config['nobody']:
            req.body_maxsize = 0

        req.timeout = grab.config['timeout']
        req.connect_timeout = grab.config['connect_timeout']

        extra_headers = {}

        # Body processing
        if grab.config['body_inmemory']:
            pass
        else:
            if not grab.config['body_storage_dir']:
                raise GrabMisuseError(
                    'Option body_storage_dir is not defined')
            file_, path_ = self.setup_body_file(
                grab.config['body_storage_dir'],
                grab.config['body_storage_filename'],
                create_dir=grab.config['body_storage_create_dir'])
            req._response_file = file_
            req._response_path = path_

        if grab.config['multipart_post'] is not None:
            post_data = grab.config['multipart_post']
            if isinstance(post_data, six.binary_type):
                pass
            elif isinstance(post_data, six.text_type):
                raise GrabMisuseError('Option multipart_post data'
                                      ' does not accept unicode.')
            else:
                post_items = normalize_http_values(
                    grab.config['multipart_post'],
                    charset=grab.config['charset'],
                    ignore_classes=(UploadFile, UploadContent),
                )
                #if six.PY3:
                post_items = decode_pairs(post_items,
                                          grab.config['charset'])
                post_items = process_upload_items(post_items)
                post_data, content_type = encode_multipart_formdata(post_items)
                extra_headers['Content-Type'] = content_type
            extra_headers['Content-Length'] = len(post_data)
            req.data = post_data
        elif grab.config['post'] is not None:
            post_data = normalize_post_data(grab.config['post'],
                                            grab.config['charset'])
            # py3 hack
            # if six.PY3:
            #    post_data = smart_unicode(post_data,
            #                              grab.config['charset'])
            extra_headers['Content-Length'] = len(post_data)
            req.data = post_data

        if method in ('POST', 'PUT'):
            if (grab.config['post'] is None and
                grab.config['multipart_post'] is None):
                    raise GrabMisuseError('Neither `post` or `multipart_post`'
                                          ' options was specified for the %s'
                                          ' request' % method)
        # Proxy
        if grab.config['proxy']:
            req.proxy = grab.config['proxy']

        if grab.config['proxy_userpwd']:
            req.proxy_userpwd = grab.config['proxy_userpwd']

        if grab.config['proxy_type']:
            req.proxy_type = grab.config['proxy_type']
        else:
            req.proxy_type = 'http'

        # User-Agent
        if grab.config['user_agent'] is None:
            if grab.config['user_agent_file'] is not None:
                with open(grab.config['user_agent_file']) as inf:
                    lines = inf.read().splitlines()
                grab.config['user_agent'] = random.choice(lines)
            else:
                grab.config['user_agent'] = generate_user_agent()

        extra_headers['User-Agent'] = grab.config['user_agent'] 


        # Headers
        headers = extra_headers
        headers.update(grab.config['common_headers'])

        if grab.config['headers']:
            headers.update(grab.config['headers'])
        req.headers = headers

        # Cookies
        self.process_cookie_options(grab, req)


        self._request = req
Example #45
0
 def test_input_datastructures(self, fields):
     encoded, _ = encode_multipart_formdata(fields, boundary=BOUNDARY)
     assert encoded.count(b(BOUNDARY)) == 3
Example #46
0
    def process_config(self, grab):
        req = Request(data=None)

        try:
            request_url = normalize_url(grab.config["url"])
        except Exception as ex:
            raise error.GrabInvalidUrl(u"%s: %s" % (six.text_type(ex), grab.config["url"]))
        req.url = request_url

        method = grab.detect_request_method()
        req.method = make_str(method)

        req.body_maxsize = grab.config["body_maxsize"]
        if grab.config["nobody"]:
            req.body_maxsize = 0

        req.timeout = grab.config["timeout"]
        req.connect_timeout = grab.config["connect_timeout"]

        extra_headers = {}

        # Body processing
        if grab.config["body_inmemory"]:
            pass
        else:
            if not grab.config["body_storage_dir"]:
                raise GrabMisuseError("Option body_storage_dir is not defined")
            file_, path_ = self.setup_body_file(
                grab.config["body_storage_dir"],
                grab.config["body_storage_filename"],
                create_dir=grab.config["body_storage_create_dir"],
            )
            req._response_file = file_
            req._response_path = path_

        if grab.config["multipart_post"] is not None:
            post_data = grab.config["multipart_post"]
            if isinstance(post_data, six.binary_type):
                pass
            elif isinstance(post_data, six.text_type):
                raise GrabMisuseError("Option multipart_post data" " does not accept unicode.")
            else:
                post_items = normalize_http_values(
                    grab.config["multipart_post"],
                    charset=grab.config["charset"],
                    ignore_classes=(UploadFile, UploadContent),
                )
                # if six.PY3:
                post_items = decode_pairs(post_items, grab.config["charset"])
                post_items = process_upload_items(post_items)
                post_data, content_type = encode_multipart_formdata(post_items)
                extra_headers["Content-Type"] = content_type
            extra_headers["Content-Length"] = len(post_data)
            req.data = post_data
        elif grab.config["post"] is not None:
            post_data = normalize_post_data(grab.config["post"], grab.config["charset"])
            # py3 hack
            # if six.PY3:
            #    post_data = smart_unicode(post_data,
            #                              grab.config['charset'])
            extra_headers["Content-Length"] = len(post_data)
            req.data = post_data

        if method in ("POST", "PUT"):
            if grab.config["post"] is None and grab.config["multipart_post"] is None:
                raise GrabMisuseError(
                    "Neither `post` or `multipart_post`" " options was specified for the %s" " request" % method
                )
        # Proxy
        if grab.config["proxy"]:
            req.proxy = grab.config["proxy"]

        if grab.config["proxy_userpwd"]:
            req.proxy_userpwd = grab.config["proxy_userpwd"]

        if grab.config["proxy_type"]:
            req.proxy_type = grab.config["proxy_type"]
        else:
            req.proxy_type = "http"

        # User-Agent
        if grab.config["user_agent"] is None:
            if grab.config["user_agent_file"] is not None:
                with open(grab.config["user_agent_file"]) as inf:
                    lines = inf.read().splitlines()
                grab.config["user_agent"] = random.choice(lines)
            else:
                grab.config["user_agent"] = generate_user_agent()

        extra_headers["User-Agent"] = grab.config["user_agent"]

        # Headers
        headers = extra_headers
        headers.update(grab.config["common_headers"])

        if grab.config["headers"]:
            headers.update(grab.config["headers"])
        req.headers = headers

        # Cookies
        self.process_cookie_options(grab, req)

        self._request = req
Example #47
0
    def _encode_files(files, data):
        """Build the body for a multipart/form-data request.

        Will successfully encode files when passed as a dict or a list of
        tuples. Order is retained if data is a list of tuples but arbitrary
        if parameters are supplied as a dict.
        The tuples may be 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
        or 4-tuples (filename, fileobj, contentype, custom_headers).
        """
        if (not files):
            raise ValueError("Files must be provided.")
        elif isinstance(data, basestring):
            raise ValueError("data must not be a string.")

        new_fields = []
        fields = to_key_val_list(data or {})
        files = to_key_val_list(files or {})

        for field, val in fields:
            if isinstance(val, basestring) or not hasattr(val, '__iter__'):
                val = [val]
            for v in val:
                if v is not None:
                    # Don't call str() on bytestrings: in Py3 it all goes wrong.
                    if not isinstance(v, bytes):
                        v = str(v)

                    new_fields.append(
                        (field.decode('utf-8') if isinstance(field, bytes) else field,
                         v.encode('utf-8') if isinstance(v, str) else v))

        for (k, v) in files:
            # support for explicit filename
            ft = None
            fh = None
            if isinstance(v, (tuple, list)):
                if len(v) == 2:
                    fn, fp = v
                elif len(v) == 3:
                    fn, fp, ft = v
                else:
                    fn, fp, ft, fh = v
            else:
                fn = guess_filename(v) or k
                fp = v

            if isinstance(fp, (str, bytes, bytearray)):
                fdata = fp
            elif hasattr(fp, 'read'):
                fdata = fp.read()
            elif fp is None:
                continue
            else:
                fdata = fp

            rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
            rf.make_multipart(content_type=ft)
            new_fields.append(rf)

        body, content_type = encode_multipart_formdata(new_fields)

        return body, content_type
def notifyClowderOfCompletedTask(task):
    # Verify that globus user has a mapping to clowder credentials in config file
    globUser = task['user']
    userMap = config['clowder']['user_map']

    if globUser in userMap:
        logger.info("%s task complete; notifying Clowder" % task['globus_id'], extra={
            "globus_id": task['globus_id'],
            "action": "NOTIFYING CLOWDER OF COMPLETION"
        })
        clowderHost = config['clowder']['host']
        clowderUser = userMap[globUser]['clowder_user']
        clowderPass = userMap[globUser]['clowder_pass']

        sess = requests.Session()
        sess.auth = (clowderUser, clowderPass)

        # This will be false if any files in the task have errors; task will be revisited
        allDone = True

        # Prepare upload object with all file(s) found
        updatedTask = safeCopy(task)

        spaceoverride = task['contents']['space_id'] if 'space_id' in task['contents'] else None
        for ds in task['contents']:
            filesQueued = []
            fileFormData = []
            datasetMD = None
            datasetMDFile = False
            lastFile = None

            # Assign dataset-level metadata if provided
            if "md" in task['contents'][ds]:
                datasetMD = clean_json_keys(task['contents'][ds]['md'])

            # Add local files to dataset by path
            if 'files' in task['contents'][ds]:
                for f in task['contents'][ds]['files']:
                    fobj = task['contents'][ds]['files'][f]
                    if 'clowder_id' not in fobj or fobj['clowder_id'] == "" or fobj['clowder_id'] == "FILE NOT FOUND":
                        if os.path.exists(fobj['path']):
                            if f.find("metadata.json") == -1:
                                if 'md' in fobj:
                                    # Use [1,-1] to avoid json.dumps wrapping quotes
                                    # Replace \" with " to avoid json.dumps escaping quotes
                                    mdstr = ', "md":' + json.dumps(fobj['md'])[1:-1].replace('\\"', '"')
                                else:
                                    mdstr = ""
                                filesQueued.append((fobj['path'], mdstr))
                                lastFile = f
                            else:
                                datasetMD = clean_json_keys(loadJsonFile(fobj['path']))
                                datasetMDFile = f
                        else:
                            logger.info("%s dataset %s lists nonexistent file: %s" % (task['globus_id'], ds, fobj['path']))
                            updatedTask['contents'][ds]['files'][fobj['name']]['clowder_id'] = "FILE NOT FOUND"
                            writeTaskToDatabase(updatedTask)

            if len(filesQueued)>0 or datasetMD:
                dsid = fetchDatasetByName(ds, sess, spaceoverride)
                dsFileList = fetchDatasetFileList(dsid, sess)
                if dsid:
                    # Only send files not already present in dataset by path
                    for queued in filesQueued:
                        alreadyStored = False
                        for storedFile in dsFileList:
                            if queued[0] == storedFile['filepath']:
                                logger.info("- skipping file %s (already uploaded)" % queued[0])
                                alreadyStored = True
                                break
                        if not alreadyStored:
                            fileFormData.append(("file",'{"path":"%s"%s}' % (queued[0], queued[1])))

                    if datasetMD:
                        # Upload metadata
                        dsmd = sess.post(clowderHost+"/api/datasets/"+dsid+"/metadata",
                                         headers={'Content-Type':'application/json'},
                                         data=json.dumps(datasetMD))

                        if dsmd.status_code != 200:
                            logger.error("- cannot add dataset metadata (%s: %s)" % (dsmd.status_code, dsmd.text))
                            return False
                        else:
                            if datasetMDFile:
                                logger.info("++ added metadata from .json file to dataset %s" % ds, extra={
                                    "dataset_name": ds,
                                    "dataset_id": dsid,
                                    "action": "METADATA ADDED",
                                    "metadata": datasetMD
                                })
                                updatedTask['contents'][ds]['files'][datasetMDFile]['metadata_loaded'] = True
                                updatedTask['contents'][ds]['files'][datasetMDFile]['clowder_id'] = "attached to dataset"
                                writeTaskToDatabase(updatedTask)
                            else:
                                # Remove metadata from activeTasks on success even if file upload fails in next step, so we don't repeat md
                                logger.info("++ added metadata to dataset %s" % ds, extra={
                                    "dataset_name": ds,
                                    "dataset_id": dsid,
                                    "action": "METADATA ADDED",
                                    "metadata": datasetMD
                                })
                                del updatedTask['contents'][ds]['md']
                                writeTaskToDatabase(updatedTask)

                    if len(fileFormData)>0:
                        # Upload collected files for this dataset
                        # Boundary encoding from http://stackoverflow.com/questions/17982741/python-using-reuests-library-for-multipart-form-data
                        logger.info("%s uploading unprocessed files belonging to %s" % (task['globus_id'], ds), extra={
                            "dataset_id": dsid,
                            "dataset_name": ds,
                            "action": "UPLOADING FILES",
                            "filelist": fileFormData
                        })

                        (content, header) = encode_multipart_formdata(fileFormData)
                        fi = sess.post(clowderHost+"/api/uploadToDataset/"+dsid,
                                       headers={'Content-Type':header},
                                       data=content)

                        if fi.status_code != 200:
                            logger.error("- cannot upload files (%s - %s)" % (fi.status_code, fi.text))
                            return False
                        else:
                            loaded = fi.json()
                            if 'ids' in loaded:
                                for fobj in loaded['ids']:
                                    logger.info("++ added file %s" % fobj['name'])
                                    updatedTask['contents'][ds]['files'][fobj['name']]['clowder_id'] = fobj['id']
                                    writeTaskToDatabase(updatedTask)
                            else:
                                logger.info("++ added file %s" % lastFile)
                                updatedTask['contents'][ds]['files'][lastFile]['clowder_id'] = loaded['id']
                                writeTaskToDatabase(updatedTask)
                else:
                    logger.error("- dataset id for %s could not be found/created" % ds)
                    allDone = False
        return allDone
    else:
        logger.error("- cannot find clowder user credentials for Globus user %s" % globUser)
        return False
Example #49
0
    def encode_params(self, data=None, files=None, **kwargs):
        """
        Build the body for a multipart/form-data request.
        Will successfully encode files when passed as a dict or a list of
        tuples. Order is retained if data is a list of tuples but arbitrary
        if parameters are supplied as a dict.
        The tuples may be string (filepath), 2-tuples (filename, fileobj), 3-tuples (filename, fileobj, contentype)
        or 4-tuples (filename, fileobj, contentype, custom_headers).
        """
        if isinstance(data, basestring):
            raise ValueError("Data must not be a string.")

        # optional args
        boundary = kwargs.get("boundary", None)
        output_str = kwargs.get("output_str", self.output_str)

        new_fields = []
        fields = to_key_val_list(data or {})
        files = to_key_val_list(files or {})

        for field, value in fields:
            ctype = None
            if isinstance(value, (tuple, list)) and len(value) == 2:
                val, ctype = value
            else:
                val = value

            if isinstance(val, basestring) or not hasattr(val, '__iter__'):
                val = [val]
            for v in val:
                # Don't call str() on bytestrings: in Py3 it all goes wrong.
                if not isinstance(v, bytes):
                    v = to_string(v, lang=output_str)

                field = field.decode('utf-8') if isinstance(field, bytes) else field
                v = v.encode('utf-8') if isinstance(v, str) else v

                rf = RequestField(name=field, data=v)
                rf.make_multipart(content_type=ctype)
                new_fields.append(rf)

        for (k, v) in files:
            # support for explicit filename
            ft = None
            fh = None
            if isinstance(v, (tuple, list)):
                if len(v) == 2:
                    fn, fp = v
                elif len(v) == 3:
                    fn, fp, ft = v
                else:
                    fn, fp, ft, fh = v
            else:
                fn, fp = guess_filename_stream(v)
                ft = guess_content_type(fn)

            if isinstance(fp, (str, bytes, bytearray)):
                fdata = fp
            else:
                fdata = fp.read()

            rf = RequestField(name=k, data=fdata, filename=fn, headers=fh)
            rf.make_multipart(content_type=ft)
            new_fields.append(rf)

        if boundary is None:
            boundary = self.boundary
        body, content_type = encode_multipart_formdata(new_fields, boundary=boundary)

        return body, content_type
Example #50
0
def submitGroupToClowder(group):
    """Create collection/dataset if needed and post files/metadata to it"""
    c_sensor = group['sensor']
    c_date = c_sensor + " - " + group['date']
    c_year = c_sensor + " - " + group['date'].split('-')[0]
    c_month = c_year + "-" + group['date'].split('-')[1]

    # Space is organized per-site, will just hardcode these for now
    if c_sensor == "ddpscIndoorSuite":
        c_space = "571fbfefe4b032ce83d96006"
        c_user = "******"
        c_user_id = "5808d84864f4455cbe16f6d1"
        c_pass = ""
        c_context = "https://terraref.ncsa.illinois.edu/metadata/danforth#"
    else:
        c_space = "571fb3e1e4b032ce83d95ecf"
        c_user = "******"
        c_user_id = "57adcb81c0a7465986583df1"
        c_pass = ""
        c_context = "https://terraref.ncsa.illinois.edu/metadata/uamac#"

    sess = requests.Session()
    sess.auth = (c_user, c_pass)

    print(c_sensor + " | " + c_year + " | " + c_month + " | " + c_date)

    id_sensor = fetchCollectionByName(c_sensor, c_space, sess)
    id_year = fetchCollectionByName(c_year, c_space, sess)
    id_month = fetchCollectionByName(c_month, c_space, sess)
    # Nest new collections if necessary
    if id_year['created']:
        associateChildCollection(id_sensor['id'], id_year['id'], sess)
    if id_month['created']:
        associateChildCollection(id_year['id'], id_month['id'], sess)

    if group['snapshot'] is not None:
        # Danforth uses Snapshot as dataset
        c_dataset = c_sensor + " - " + group['snapshot']
        id_date = fetchCollectionByName(c_date, c_space, sess)
        if id_date["created"]:
            associateChildCollection(id_month['id'], id_date['id'], sess)
        id_dataset = fetchDatasetByName(c_dataset, c_space, id_sensor["id"],
                                        id_year["id"], id_month["id"],
                                        id_date["id"], sess)
    elif group['timestamp'] is None:
        # Some have the date level as the dataset, not a collection
        c_dataset = c_sensor + " - " + group['date']
        id_dataset = fetchDatasetByName(c_dataset, c_space, id_sensor["id"],
                                        id_year["id"], id_month["id"], None,
                                        sess)
    else:
        c_dataset = c_sensor + " - " + group['timestamp']
        id_date = fetchCollectionByName(c_date, c_space, sess)
        if id_date["created"]:
            associateChildCollection(id_month['id'], id_date['id'], sess)
        id_dataset = fetchDatasetByName(c_dataset, c_space, id_sensor["id"],
                                        id_year["id"], id_month["id"],
                                        id_date["id"], sess)

    # Perform actual posts
    if id_dataset:
        if group['metadata']:
            md = {
                "@context": [
                    "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
                    {
                        "@vocab": c_context
                    }
                ],
                "content":
                group['metadata'],
                "agent": {
                    "@type":
                    "cat:user",
                    "user_id":
                    "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" %
                    c_user_id
                }
            }
            sess.post(clowderURL + "/api/datasets/" + id_dataset +
                      "/metadata.jsonld",
                      headers={'Content-Type': 'application/json'},
                      data=json.dumps(md))
            print("++++ added metadata to %s (%s)" % (c_dataset, id_dataset))

        fileFormData = []
        for f in group['files']:
            # METADATA
            # Use [1,-1] to avoid json.dumps wrapping quotes
            # Replace \" with " to avoid json.dumps escaping quotes
            fmd = group['file_md'][f] if f in group['file_md'] else None
            mdstr = ', "md":' + json.dumps(fmd).replace('\\"',
                                                        '"') if fmd else ""
            if f.find("/gpfs/largeblockFS/") > -1:
                f = f.replace("/gpfs/largeblockFS/projects/arpae/terraref/",
                              "/home/clowder/")

            fileFormData.append(("file", '{"path":"%s"%s}' % (f, mdstr)))

        if len(fileFormData) > 0:
            (content, header) = encode_multipart_formdata(fileFormData)
            fi = sess.post(clowderURL + "/api/uploadToDataset/" + id_dataset,
                           headers={'Content-Type': header},
                           data=content)

            if fi.status_code == 200:
                print("++++ added files to %s (%s)" % (c_dataset, id_dataset))
            else:
                print(fi.status_code)
                print(fi.status_message)
Example #51
0
    def process_config(self, grab):
        req = Request(data=None)

        try:
            request_url = normalize_url(grab.config['url'])
        except Exception as ex:
            raise error.GrabInvalidUrl(
                u'%s: %s' %
                (six.text_type(ex),
                 make_unicode(grab.config['url'], errors='ignore')))
        req.url = request_url

        method = grab.detect_request_method()
        req.method = make_str(method)

        req.config_body_maxsize = grab.config['body_maxsize']
        req.config_nobody = grab.config['nobody']

        req.timeout = grab.config['timeout']
        req.connect_timeout = grab.config['connect_timeout']

        extra_headers = {}

        # Body processing
        if grab.config['body_inmemory']:
            pass
        else:
            if not grab.config['body_storage_dir']:
                raise GrabMisuseError('Option body_storage_dir is not defined')
            file_, path_ = self.setup_body_file(
                grab.config['body_storage_dir'],
                grab.config['body_storage_filename'],
                create_dir=grab.config['body_storage_create_dir'])
            req.response_file = file_
            req.response_path = path_

        if grab.config['multipart_post'] is not None:
            post_data = grab.config['multipart_post']
            if isinstance(post_data, six.binary_type):
                pass
            elif isinstance(post_data, six.text_type):
                raise GrabMisuseError('Option multipart_post data'
                                      ' does not accept unicode.')
            else:
                post_items = normalize_http_values(
                    grab.config['multipart_post'],
                    charset=grab.config['charset'],
                    ignore_classes=(UploadFile, UploadContent),
                )
                post_items = decode_pairs(post_items, grab.config['charset'])
                post_items = process_upload_items(post_items)
                post_data, content_type = encode_multipart_formdata(post_items)
                extra_headers['Content-Type'] = content_type
            extra_headers['Content-Length'] = len(post_data)
            req.data = post_data
        elif grab.config['post'] is not None:
            post_data = normalize_post_data(grab.config['post'],
                                            grab.config['charset'])
            # py3 hack
            # if six.PY3:
            #    post_data = smart_unicode(post_data,
            #                              grab.config['charset'])
            extra_headers['Content-Length'] = len(post_data)
            req.data = post_data

        if method in ('POST', 'PUT'):
            if (grab.config['post'] is None
                    and grab.config['multipart_post'] is None):
                raise GrabMisuseError('Neither `post` or `multipart_post`'
                                      ' options was specified for the %s'
                                      ' request' % method)
        # Proxy
        if grab.config['proxy']:
            req.proxy = grab.config['proxy']

        if grab.config['proxy_userpwd']:
            req.proxy_userpwd = grab.config['proxy_userpwd']

        if grab.config['proxy_type']:
            req.proxy_type = grab.config['proxy_type']
        else:
            req.proxy_type = 'http'

        # User-Agent
        if grab.config['user_agent'] is None:
            if grab.config['user_agent_file'] is not None:
                with open(grab.config['user_agent_file']) as inf:
                    lines = inf.read().splitlines()
                grab.config['user_agent'] = random.choice(lines)
            else:
                grab.config['user_agent'] = generate_user_agent()

        extra_headers['User-Agent'] = grab.config['user_agent']

        # Headers
        headers = extra_headers
        headers.update(grab.config['common_headers'])

        if grab.config['headers']:
            headers.update(grab.config['headers'])
        req.headers = headers

        # Cookies
        self.process_cookie_options(grab, req)

        self._request = req
Example #52
0
    def request(self, method, url, query_params=None, headers=None,
                      body=None, post_params=None, _preload_content=True, _request_timeout=None):
        """
        :param method: http request method
        :param url: http request url
        :param query_params: query parameters in the url
        :param headers: http request headers
        :param body: request json body, for `application/json`
        :param post_params: request post parameters,
                            `application/x-www-form-urlencoded`
                            and `multipart/form-data`
        :param _preload_content: this is a non-applicable field for the AiohttpClient.
        :param _request_timeout: timeout setting for this request. If one number provided, it will be total request
                                 timeout. It can also be a pair (tuple) of (connection, read) timeouts.
        """
        method = method.upper()
        assert method in ['GET', 'HEAD', 'DELETE', 'POST', 'PUT', 'PATCH', 'OPTIONS']

        if post_params and body:
            raise ValueError(
                "body parameter cannot be used with post_params parameter."
            )

        request = HTTPRequest(url)
        request.ssl_context = self.ssl_context
        request.proxy_host = self.proxy_host
        request.proxy_port = self.proxy_port
        request.method = method
        if headers:
            request.headers = headers
        if 'Content-Type' not in headers:
            request.headers['Content-Type'] = 'application/json'
        request.request_timeout = _request_timeout or 5 * 60


        post_params = post_params or {}

        if query_params:
            request.url += '?' + urlencode(query_params)

        # For `POST`, `PUT`, `PATCH`, `OPTIONS`, `DELETE`
        if method in ['POST', 'PUT', 'PATCH', 'OPTIONS', 'DELETE']:
            if re.search('json', headers['Content-Type'], re.IGNORECASE):
                if body:
                    body = json.dumps(body)
                request.body = body
            elif headers['Content-Type'] == 'application/x-www-form-urlencoded':
                request.body = urlencode(post_params)
            # TODO: transform to multipart form
            elif headers['Content-Type'] == 'multipart/form-data':
                request.body = encode_multipart_formdata(post_params)
            # Pass a `bytes` parameter directly in the body to support
            # other content types than Json when `body` argument is provided
            # in serialized form
            elif isinstance(body, bytes):
                request.body = body
            else:
                # Cannot generate the request from given parameters
                msg = """Cannot prepare a request message for provided arguments.
                Please check that your arguments match declared content type."""
                raise ApiException(status=0, reason=msg)

        r = yield self.pool_manager.fetch(request)
        r = RESTResponse(r, r.body)

        # log response body
        logger.debug("response body: %s", r.data)

        if not 200 <= r.status <= 299:
            raise ApiException(http_resp=r)

        return r
Example #53
0
 def test_input_datastructures(self, fields: _TYPE_FIELDS) -> None:
     encoded, _ = encode_multipart_formdata(fields, boundary=BOUNDARY)
     assert encoded.count(BOUNDARY_BYTES) == 3