Exemple #1
0
    def test_force_unicode(self):
        self.assertEqual(force_unicode(b'Hello \xe2\x98\x83'), 'Hello ☃')
        # Don't mangle, it's already Unicode.
        self.assertEqual(force_unicode('Hello ☃'), 'Hello ☃')

        self.assertEqual(force_unicode(1), '1', "force_unicode() should convert ints")
        self.assertEqual(force_unicode(1.0), '1.0', "force_unicode() should convert floats")
        self.assertEqual(force_unicode(None), 'None', 'force_unicode() should convert None')
Exemple #2
0
    def test_force_unicode(self):
        self.assertEqual(force_unicode(b'Hello \xe2\x98\x83'), 'Hello ☃')
        # Don't mangle, it's already Unicode.
        self.assertEqual(force_unicode('Hello ☃'), 'Hello ☃')

        self.assertEqual(force_unicode(1), '1', "force_unicode() should convert ints")
        self.assertEqual(force_unicode(1.0), '1.0', "force_unicode() should convert floats")
        self.assertEqual(force_unicode(None), 'None', 'force_unicode() should convert None')
Exemple #3
0
    def test_force_unicode(self):
        self.assertEqual(force_unicode(b"Hello \xe2\x98\x83"), "Hello ☃")
        # Don't mangle, it's already Unicode.
        self.assertEqual(force_unicode("Hello ☃"), "Hello ☃")

        self.assertEqual(force_unicode(1), "1", "force_unicode() should convert ints")
        self.assertEqual(force_unicode(1.0), "1.0", "force_unicode() should convert floats")
        self.assertEqual(force_unicode(None), "None", "force_unicode() should convert None")
Exemple #4
0
    def test_force_unicode(self):
        self.assertEqual(force_unicode(b"Hello \xe2\x98\x83"), "Hello ☃")
        # Don't mangle, it's already Unicode.
        self.assertEqual(force_unicode("Hello ☃"), "Hello ☃")

        self.assertEqual(force_unicode(1), "1",
                         "force_unicode() should convert ints")
        self.assertEqual(force_unicode(1.0), "1.0",
                         "force_unicode() should convert floats")
        self.assertEqual(force_unicode(None), "None",
                         "force_unicode() should convert None")
Exemple #5
0
 def test_safe_urlencode(self):
     self.assertEqual(
         force_unicode(unquote_plus(safe_urlencode({"test": "Hello ☃! Helllo world!"}))),
         "test=Hello ☃! Helllo world!",
     )
     self.assertEqual(
         force_unicode(unquote_plus(safe_urlencode({"test": ["Hello ☃!", "Helllo world!"]}, True))),
         "test=Hello \u2603!&test=Helllo world!",
     )
     self.assertEqual(
         force_unicode(unquote_plus(safe_urlencode({"test": ("Hello ☃!", "Helllo world!")}, True))),
         "test=Hello \u2603!&test=Helllo world!",
     )
    def _to_python(self, value):
        """
        Converts values from Solr to native Python values.
        """
        if isinstance(value, (int, list, tuple, float, long, complex)):
            return value

        if value == 'true':
            return True
        elif value == 'false':
            return False

        is_string = False

        if isinstance(value, str):
            value = force_unicode(value)

        if isinstance(value, basestring):
            is_string = True

        if is_string == True:
            possible_datetime = DATETIME_REGEX.search(value)

            if possible_datetime:
                date_values = possible_datetime.groupdict()

                for dk, dv in date_values.items():
                    date_values[dk] = int(dv)

                return datetime.datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], date_values['minute'], date_values['second'])

        return value
Exemple #7
0
 def put(self, data):
     '''
     Supports partial update of solr.
     '''
     if data is None or len(data) == 0:
         return
     # Update Solr: (Mostly from pysolr.Solr code.)
     # Generate the exact update command in xml -
     #  <add>
     #   <doc>
     #    <field name="id">1</field>
     #    <field name="memory_used" update="set">832</field>
     #   </doc>
     #  </add>
     data_xml = ET.Element('add')
     for doc_update in data: 
         doc_element = ET.Element('doc')
         id_field = ET.Element('field', **{'name':'id'})
         id_field.text = str(doc_update['id'])
         doc_element.append(id_field)
         for field in doc_update['fields']:
             field_xml = ET.Element('field', **{'name':field['name'], 'update':field['command']})
             field_xml.text = str(field['value'])
             doc_element.append(field_xml)
         data_xml.append(doc_element)
     # This returns a bytestring.
     data_xml_str = ET.tostring(data_xml, encoding='utf-8')
     # Convert back to Unicode.
     data_xml_str = pysolr.force_unicode(data_xml_str)
     try:
         solr = session.get_solr_interface(self.solr_url)
         solr._update(data_xml_str)
     except:
         LOG.exception('Failed to add to solr.')
         raise
Exemple #8
0
 def test_safe_urlencode(self):
     self.assertEqual(
         force_unicode(
             unquote_plus(safe_urlencode({'test':
                                          'Hello ☃! Helllo world!'}))),
         'test=Hello ☃! Helllo world!')
     self.assertEqual(
         force_unicode(
             unquote_plus(
                 safe_urlencode({'test': ['Hello ☃!', 'Helllo world!']},
                                True))),
         "test=Hello \u2603!&test=Helllo world!")
     self.assertEqual(
         force_unicode(
             unquote_plus(
                 safe_urlencode({'test': ('Hello ☃!', 'Helllo world!')},
                                True))),
         "test=Hello \u2603!&test=Helllo world!")
Exemple #9
0
 def test__build_doc_with_sets(self):
     doc = {"id": "doc_1", "title": "Set test doc", "tags": {"alpha", "beta"}}
     doc_xml = force_unicode(
         ElementTree.tostring(self.solr._build_doc(doc), encoding="utf-8")
     )
     self.assertIn('<field name="id">doc_1</field>', doc_xml)
     self.assertIn('<field name="title">Set test doc</field>', doc_xml)
     self.assertIn('<field name="tags">alpha</field>', doc_xml)
     self.assertIn('<field name="tags">beta</field>', doc_xml)
     self.assertEqual(len(doc_xml), 144)
Exemple #10
0
 def test__build_doc(self):
     doc = {
         'id': 'doc_1',
         'title': 'Example doc ☃ 1',
         'price': 12.59,
         'popularity': 10,
     }
     doc_xml = force_unicode(ElementTree.tostring(self.solr._build_doc(doc), encoding='utf-8'))
     self.assertTrue('<field name="title">Example doc ☃ 1</field>' in doc_xml)
     self.assertTrue('<field name="id">doc_1</field>' in doc_xml)
     self.assertEqual(len(doc_xml), 152)
Exemple #11
0
 def test__build_doc(self):
     doc = {
         'id': 'doc_1',
         'title': 'Example doc ☃ 1',
         'price': 12.59,
         'popularity': 10,
     }
     doc_xml = force_unicode(ElementTree.tostring(self.solr._build_doc(doc), encoding='utf-8'))
     self.assertTrue('<field name="title">Example doc ☃ 1</field>' in doc_xml)
     self.assertTrue('<field name="id">doc_1</field>' in doc_xml)
     self.assertEqual(len(doc_xml), 152)
Exemple #12
0
 def test__build_doc(self):
     doc = {
         "id": "doc_1",
         "title": "Example doc ☃ 1",
         "price": 12.59,
         "popularity": 10,
     }
     doc_xml = force_unicode(
         ElementTree.tostring(self.solr._build_doc(doc), encoding="utf-8"))
     self.assertIn('<field name="title">Example doc ☃ 1</field>', doc_xml)
     self.assertIn('<field name="id">doc_1</field>', doc_xml)
     self.assertEqual(len(doc_xml), 152)
Exemple #13
0
 def test_safe_urlencode(self):
     self.assertEqual(
         force_unicode(
             unquote_plus(safe_urlencode({"test":
                                          "Hello ☃! Helllo world!"}))),
         "test=Hello ☃! Helllo world!",
     )
     self.assertEqual(
         force_unicode(
             unquote_plus(
                 safe_urlencode({"test": ["Hello ☃!", "Helllo world!"]},
                                True))),
         "test=Hello \u2603!&test=Helllo world!",
     )
     self.assertEqual(
         force_unicode(
             unquote_plus(
                 safe_urlencode({"test": ("Hello ☃!", "Helllo world!")},
                                True))),
         "test=Hello \u2603!&test=Helllo world!",
     )
Exemple #14
0
 def test__build_doc_with_sets(self):
     doc = {
         'id': 'doc_1',
         'title': 'Set test doc',
         'tags': set(['alpha', 'beta']),
     }
     doc_xml = force_unicode(ElementTree.tostring(self.solr._build_doc(doc), encoding='utf-8'))
     self.assertTrue('<field name="id">doc_1</field>' in doc_xml)
     self.assertTrue('<field name="title">Set test doc</field>' in doc_xml)
     self.assertTrue('<field name="tags">alpha</field>' in doc_xml)
     self.assertTrue('<field name="tags">beta</field>' in doc_xml)
     self.assertEqual(len(doc_xml), 144)
Exemple #15
0
 def test__build_doc_with_sets(self):
     doc = {
         'id': 'doc_1',
         'title': 'Set test doc',
         'tags': set(['alpha', 'beta']),
     }
     doc_xml = force_unicode(ElementTree.tostring(self.solr._build_doc(doc), encoding='utf-8'))
     self.assertTrue('<field name="id">doc_1</field>' in doc_xml)
     self.assertTrue('<field name="title">Set test doc</field>' in doc_xml)
     self.assertTrue('<field name="tags">alpha</field>' in doc_xml)
     self.assertTrue('<field name="tags">beta</field>' in doc_xml)
     self.assertEqual(len(doc_xml), 144)
def add(solr,
        docs,
        dsId,
        commit=True,
        boost=None,
        commitWithin="1000",
        waitFlush=None,
        waitSearcher=None):
    """
    Adds or updates documents.
    Requires ``docs``, which is a list of dictionaries. Each key is the
    field name and each value is the value to index.
    Optionally accepts ``commit``. Default is ``True``.
    Optionally accepts ``boost``. Default is ``None``.
    Optionally accepts ``commitWithin``. Default is ``None``.
    Optionally accepts ``waitFlush``. Default is ``None``.
    Optionally accepts ``waitSearcher``. Default is ``None``.
    Usage::
        solr.add([
                            {
                                "id": "doc_1",
                                "title": "A test document",
                            },
                            {
                                "id": "doc_2",
                                "title": "The Banana: Tasty or Dangerous?",
                            },
                        ])
    """
    start_time = time.time()
    #self.log.debug("Starting to build add request...")
    message = ET.Element('add')

    if commitWithin:
        message.set('commitWithin', commitWithin)

    for doc in docs:
        message.append(solr._build_doc(doc, boost=boost))

    # This returns a bytestring. Ugh.
    m = ET.tostring(message, encoding='utf-8')
    # Convert back to Unicode please.
    m = pysolr.force_unicode(m)
    #print "Indexing to: " + dsId
    end_time = time.time()
    #self.log.debug("Built add request of %s docs in %0.2f seconds.", len(message), end_time - start_time)
    return update(solr,
                  m,
                  dsId,
                  commit=commit,
                  waitFlush=waitFlush,
                  waitSearcher=waitSearcher)
Exemple #17
0
 def test__build_xml_doc_with_empty_values(self):
     doc = {
         "id": "doc_1",
         "title": "",
         "price": None,
         "tags": [],
     }
     doc_xml = force_unicode(
         ElementTree.tostring(self.solr._build_xml_doc(doc), encoding="utf-8")
     )
     self.assertNotIn('<field name="title" />', doc_xml)
     self.assertNotIn('<field name="price" />', doc_xml)
     self.assertNotIn('<field name="tags" />', doc_xml)
     self.assertIn('<field name="id">doc_1</field>', doc_xml)
     self.assertEqual(len(doc_xml), 41)
def add(solr, docs, dsId, commit=True, boost=None, commitWithin="1000", waitFlush=None, waitSearcher=None):
    """
    Adds or updates documents.
    Requires ``docs``, which is a list of dictionaries. Each key is the
    field name and each value is the value to index.
    Optionally accepts ``commit``. Default is ``True``.
    Optionally accepts ``boost``. Default is ``None``.
    Optionally accepts ``commitWithin``. Default is ``None``.
    Optionally accepts ``waitFlush``. Default is ``None``.
    Optionally accepts ``waitSearcher``. Default is ``None``.
    Usage::
        solr.add([
                            {
                                "id": "doc_1",
                                "title": "A test document",
                            },
                            {
                                "id": "doc_2",
                                "title": "The Banana: Tasty or Dangerous?",
                            },
                        ])
    """
    start_time = time.time()
    #self.log.debug("Starting to build add request...")
    message = ET.Element('add')

    if commitWithin:
        message.set('commitWithin', commitWithin)

    for doc in docs:
        message.append(solr._build_doc(doc, boost=boost))

    # This returns a bytestring. Ugh.
    m = ET.tostring(message, encoding='utf-8')
    # Convert back to Unicode please.
    m = pysolr.force_unicode(m)
    #print "Indexing to: " + dsId
    end_time = time.time()
    #self.log.debug("Built add request of %s docs in %0.2f seconds.", len(message), end_time - start_time)
    return update(solr, m, dsId, commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher)
Exemple #19
0
 def test__build_xml_doc_with_empty_values_and_field_updates(self):
     doc = {
         "id": "doc_1",
         "title": "",
         "price": None,
         "tags": [],
     }
     fieldUpdates = {
         "title": "set",
         "tags": "set",
     }
     doc_xml = force_unicode(
         ElementTree.tostring(
             self.solr._build_xml_doc(doc, fieldUpdates=fieldUpdates),
             encoding="utf-8",
         )
     )
     self.assertIn('<field name="title" null="true" update="set" />', doc_xml)
     self.assertNotIn('<field name="price" />', doc_xml)
     self.assertIn('<field name="tags" null="true" update="set" />', doc_xml)
     self.assertIn('<field name="id">doc_1</field>', doc_xml)
     self.assertEqual(len(doc_xml), 134)
Exemple #20
0
    def _send_request(self,
                      method,
                      path='',
                      body=None,
                      headers=None,
                      files=None):
        """
        Copy and paste of the base (pysolr version 3.2.0) _send_request()
        method except for the resp = requests_method() line, which
        passes along the auth information.

        """
        url = self._create_full_url(path)
        method = method.lower()
        log_body = body

        if headers is None:
            headers = {}

        if log_body is None:
            log_body = ''
        elif not isinstance(log_body, str):
            log_body = repr(body)

        self.log.debug("Starting request to '%s' (%s) with body '%s'...", url,
                       method, log_body[:10])
        start_time = time.time()

        try:
            requests_method = getattr(self.session, method, 'get')
        except AttributeError:
            err = "Unable to send HTTP method '{0}.".format(method)
            raise pysolr.SolrError(err)

        try:
            bytes_body = body

            if bytes_body is not None:
                bytes_body = pysolr.force_bytes(body)

            resp = requests_method(url,
                                   data=bytes_body,
                                   headers=headers,
                                   files=files,
                                   timeout=self.timeout,
                                   auth=self.auth)
        except requests.exceptions.Timeout as err:
            error_message = "Connection to server '%s' timed out: %s"
            self.log.error(error_message, url, err, exc_info=True)
            raise pysolr.SolrError(error_message % (url, err))
        except requests.exceptions.ConnectionError as err:
            error_message = "Failed to connect to server at '%s', are you " \
                            "sure that URL is correct? Checking it in a " \
                            "browser might help: %s"
            params = (url, err)
            self.log.error(error_message, *params, exc_info=True)
            raise pysolr.SolrError(error_message % params)

        end_time = time.time()
        self.log.info("Finished '%s' (%s) with body '%s' in %0.3f seconds.",
                      url, method, log_body[:10], end_time - start_time)

        if int(resp.status_code) != 200:
            error_message = self._extract_error(resp)
            data = {
                'data': {
                    'headers': resp.headers,
                    'response': resp.content
                }
            }
            self.log.error(error_message, extra=data)
            raise pysolr.SolrError(error_message)

        return pysolr.force_unicode(resp.content)
    def _send_request(self, method, path='', body=None, headers=None,
                      files=None):
        """
        Copy and paste of the base (pysolr version 3.2.0) _send_request()
        method except for the resp = requests_method() line, which
        passes along the auth information.

        """
        url = self._create_full_url(path)
        method = method.lower()
        log_body = body

        if headers is None:
            headers = {}

        if log_body is None:
            log_body = ''
        elif not isinstance(log_body, str):
            log_body = repr(body)

        self.log.debug("Starting request to '%s' (%s) with body '%s'...", url,
                       method, log_body[:10])
        start_time = time.time()

        try:
            requests_method = getattr(self.session, method, 'get')
        except AttributeError:
            err = "Unable to send HTTP method '{0}.".format(method)
            raise pysolr.SolrError(err)

        try:
            bytes_body = body

            if bytes_body is not None:
                bytes_body = pysolr.force_bytes(body)

            resp = requests_method(url, data=bytes_body, headers=headers,
                                   files=files, timeout=self.timeout,
                                   auth=self.auth)
        except requests.exceptions.Timeout as err:
            error_message = "Connection to server '%s' timed out: %s"
            self.log.error(error_message, url, err, exc_info=True)
            raise pysolr.SolrError(error_message % (url, err))
        except requests.exceptions.ConnectionError as err:
            error_message = "Failed to connect to server at '%s', are you " \
                            "sure that URL is correct? Checking it in a " \
                            "browser might help: %s"
            params = (url, err)
            self.log.error(error_message, *params, exc_info=True)
            raise pysolr.SolrError(error_message % params)

        end_time = time.time()
        self.log.info("Finished '%s' (%s) with body '%s' in %0.3f seconds.",
                      url, method, log_body[:10], end_time - start_time)

        if int(resp.status_code) != 200:
            error_message = self._extract_error(resp)
            data = {'data': {'headers': resp.headers, 'response': resp.content}}
            self.log.error(error_message, extra=data)
            raise pysolr.SolrError(error_message)

        return pysolr.force_unicode(resp.content)
    def _to_python(self, value):
        """
        Converts values from Solr to native Python values.
        """
        if value is None:
            return value

        if isinstance(value, (int, float, complex)):
            return value

        is_list = isinstance(value, (list, tuple))

        values_processed = []
        values_to_process = []

        if isinstance(value, (list, tuple)):
            # Clone the value
            values_to_process = value[:]
        else:
            values_to_process.append(value)

        for value in values_to_process:

            if value == "true":
                values_processed.append(True)
                continue
            elif value == "false":
                values_processed.append(False)
                continue

            is_string = False

            if IS_PY3:
                if isinstance(value, bytes):
                    value = force_unicode(value)

                if isinstance(value, str):
                    is_string = True
            else:
                if isinstance(value, str):
                    value = force_unicode(value)

                if isinstance(value, string_types):
                    is_string = True

            if is_string:
                possible_datetime = DATETIME_REGEX.search(value)

                if possible_datetime:
                    date_values = possible_datetime.groupdict()

                    for dk, dv in date_values.items():
                        date_values[dk] = int(dv)

                    values_processed.append(
                        datetime.datetime(
                            date_values["year"],
                            date_values["month"],
                            date_values["day"],
                            date_values["hour"],
                            date_values["minute"],
                            date_values["second"],
                        )
                    )
                    continue
                # elif ObjectId.is_valid(value):
                #    values_processed.append(value)
                #    continue
                elif is_valid_uuid(value, version=4):
                    values_processed.append(value)
                    continue
                elif is_valid_uuid(value, version=3):
                    values_processed.append(value)
                    continue
                elif is_valid_uuid(value, version=2):
                    values_processed.append(value)
                    continue
                elif is_valid_uuid(value, version=1):
                    values_processed.append(value)
                    continue
            try:
                # This is slightly gross but it's hard to tell otherwise what
                # the string's original type might have been.
                values_processed.append(ast.literal_eval(value))
            except (ValueError, SyntaxError):
                # If it fails, continue on.
                pass

            values_processed.append(value)

        return values_processed if is_list else values_processed[0]
Exemple #23
0
 def test__build_doc(self):
     doc = {"id": "doc_1", "title": "Example doc ☃ 1", "price": 12.59, "popularity": 10}
     doc_xml = force_unicode(ET.tostring(self.solr._build_doc(doc), encoding="utf-8"))
     self.assertTrue('<field name="title">Example doc ☃ 1</field>' in doc_xml)
     self.assertTrue('<field name="id">doc_1</field>' in doc_xml)
     self.assertEqual(len(doc_xml), 152)
Exemple #24
0
 def test_safe_urlencode(self):
     self.assertEqual(force_unicode(unquote_plus(safe_urlencode({'test': 'Hello ☃! Helllo world!'}))), 'test=Hello ☃! Helllo world!')
     self.assertEqual(force_unicode(unquote_plus(safe_urlencode({'test': ['Hello ☃!', 'Helllo world!']}, True))), "test=Hello \u2603!&test=Helllo world!")
     self.assertEqual(force_unicode(unquote_plus(safe_urlencode({'test': ('Hello ☃!', 'Helllo world!')}, True))), "test=Hello \u2603!&test=Helllo world!")