Exemple #1
0
    def test_compare_equal_documents(self):
        document_1 = Document(session=None, **self.example_json)
        document_2 = Document(session=self.session, **self.example_json)
        # `session` object (that holds authentication information) does not
        # matter for equality of `Document` objects

        self.assertEqual(document_1, document_2)
Exemple #2
0
    def test_getting_specific_property_returns_an_error(self, mocked_get):
        mocked_get.return_value.status_code = 403
        session = requests.Session()
        session.auth = ('wrong_user', 'my_precious')
        document = Document(session=session, **self.example_json)

        with self.assertRaises(RuntimeError):
            document.get_property('text')
Exemple #3
0
    def test_compare_documents_with_different_corpora(self):
        document_1 = Document(session=None, **self.example_json)

        json_2 = self.example_json.copy()
        json_2['corpus'] = "http://pypln.example.com/corpora/2/"
        document_2 = Document(session=None, **json_2)

        self.assertNotEqual(document_1, document_2)
Exemple #4
0
    def test_compare_documents_with_different_owners(self):
        document_1 = Document(session=None, **self.example_json)

        json_2 = self.example_json.copy()
        json_2['owner'] = "user_2"
        document_2 = Document(session=None, **json_2)

        self.assertNotEqual(document_1, document_2)
Exemple #5
0
    def test_compare_documents_with_different_upload_dates(self):
        document_1 = Document(session=None, **self.example_json)

        json_2 = self.example_json.copy()
        json_2['uploaded_at'] = '2013-10-29T17:00:00.000Z'
        document_2 = Document(session=None, **json_2)

        self.assertNotEqual(document_1, document_2)
Exemple #6
0
    def test_compare_documents_with_different_sizes(self):
        document_1 = Document(session=None, **self.example_json)

        json_2 = self.example_json.copy()
        json_2['size'] = 1
        document_2 = Document(session=None, **json_2)

        self.assertNotEqual(document_1, document_2)
Exemple #7
0
    def test_compare_documents_with_different_urls(self):
        document_1 = Document(session=None, **self.example_json)

        json_2 = self.example_json.copy()
        json_2['url'] = 'http://pypln.example2.com/documents/1/'
        document_2 = Document(session=None, **json_2)

        self.assertNotEqual(document_1, document_2)
Exemple #8
0
    def test_getting_specific_property_returns_an_error(self, mocked_get):
        mocked_get.return_value.status_code = 403
        session = requests.Session()
        session.auth = ('wrong_user', 'my_precious')
        document = Document(session=session, **self.example_json)

        with self.assertRaises(RuntimeError):
            document.get_property('text')
Exemple #9
0
    def test_get_specific_property(self, mocked_get):
        text = "This is a test file with some test text."

        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = {'value': text}

        document = Document(session=self.session, **self.example_json)

        self.assertEqual(document.get_property('text'), text)
        mocked_get.assert_called_with(self.example_json['properties'] + 'text')
Exemple #10
0
    def test_get_specific_property(self, mocked_get):
        text = "This is a test file with some test text."

        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = {'value': text}

        document = Document(session=self.session, **self.example_json)

        self.assertEqual(document.get_property('text'), text)
        mocked_get.assert_called_with(self.example_json['properties'] + 'text')
Exemple #11
0
    def test_instantiating_document_from_url_fails(self, mocked_get):
        mocked_get.return_value.status_code = 403
        mocked_get.return_value.json.return_value = self.example_json

        url = self.example_json['url']

        with self.assertRaises(RuntimeError):
            document = Document.from_url(url, ('wrong_user', 'my_precious'))
Exemple #12
0
    def test_instantiating_document_from_url_fails(self, mocked_get):
        mocked_get.return_value.status_code = 403
        mocked_get.return_value.json.return_value = self.example_json

        url = self.example_json['url']

        with self.assertRaises(RuntimeError):
            document = Document.from_url(url, ('wrong_user', 'my_precious'))
Exemple #13
0
    def test_instantiate_document_from_json(self):
        document = Document(session=self.session, **self.example_json)

        for k, v in self.example_json.items():
            if k != "properties":
                self.assertEqual(getattr(document, k), v)
        self.assertIs(document.session, self.session)
        self.assertEqual(document.properties_url,
                         self.example_json['properties'])
Exemple #14
0
def main():
    cursor = pypln_temp.find()

    while pypln_temp.count() > 0:
        for article in cursor:
            try:
                url = article['pypln_url']
                my_doc = Document.from_url(url, settings.PYPLN_CREDENTIALS)
                _id = article['articles_id']
                _id_temp = article['_id']
            except RuntimeError as e:
                logger.error(
                    "The document {} could not be found on the PyPLN collection: {}"
                    .format(url, e))
                continue

            if '_exception' in my_doc.properties:
                logger.warning("PyPLN found an error {}".format(
                    article['pypln_url']))
                articles.update({'_id': _id}, {'$set': {'status': 2}})
                pypln_temp.remove({'_id': _id_temp})
                continue

            if len(my_doc.properties) < 29:
                if 'time' in article:
                    if (datetime.datetime.now() -
                            article['time']).seconds / 60 > 5:
                        logger.warning(
                            "PyPLN could not finish the analysis {}".format(
                                article['pypln_url']))
                        articles.update({'_id': _id}, {'$set': {'status': 2}})
                        pypln_temp.remove({'_id': _id_temp})
                    else:
                        continue
                else:
                    pypln_temp.update(
                        {'_id': _id_temp},
                        {'$set': {
                            'time': datetime.datetime.now()
                        }})

            else:
                analysis = {'articles_id': _id}
                for property in my_doc.properties:
                    analysis[property] = my_doc.get_property(property)

                articles_analysis.insert(analysis)
                articles.update({'_id': _id}, {'$set': {'status': 1}})
                pypln_temp.remove({'_id': _id_temp})
        cursor = pypln_temp.find()
    cursor.close()
Exemple #15
0
    def test_download_wordcloud(self, mocked_get):
        png = "This is not really a png.\n".encode('ascii')
        encoded_png = base64.b64encode(png)

        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = {'value': encoded_png}

        document = Document(session=self.session, **self.example_json)

        import sys
        if sys.version < '3':
            builtins_module = '__builtin__'
        else:
            builtins_module = 'builtins'

        m = mock_open()
        with patch('{}.open'.format(builtins_module), m, create=True):
            document.download_wordcloud('test.png')

        m.assert_called_once_with('test.png', 'w')
        handle = m()
        handle.write.assert_called_once_with(png.decode('ascii'))
        mocked_get.assert_called_with(self.example_json['properties'] +
                                      'wordcloud')
Exemple #16
0
    def test_download_wordcloud(self, mocked_get):
        png = "This is not really a png.\n".encode('ascii')
        encoded_png = base64.b64encode(png)

        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = {'value': encoded_png}

        document = Document(session=self.session, **self.example_json)

        import sys
        if sys.version < '3':
            builtins_module = '__builtin__'
        else:
            builtins_module = 'builtins'

        m = mock_open()
        with patch('{}.open'.format(builtins_module), m, create=True):
            document.download_wordcloud('test.png')

        m.assert_called_once_with('test.png', 'w')
        handle = m()
        handle.write.assert_called_once_with(png.decode('ascii'))
        mocked_get.assert_called_with(self.example_json['properties'] +
                'wordcloud')
Exemple #17
0
    def test_instantiate_document_from_url(self, mocked_get):
        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = self.example_json

        url = self.example_json['url']

        document = Document.from_url(url, self.auth)

        mocked_get.assert_called_with(url)

        self.assertIsInstance(document, Document)

        for k, v in self.example_json.items():
            if k != "properties":
                self.assertEqual(getattr(document, k), v)
        self.assertEqual(document.properties_url,
                         self.example_json['properties'])

        self.assertEqual(document.session.auth, self.auth)
Exemple #18
0
    def test_instantiate_document_from_url(self, mocked_get):
        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = self.example_json

        url = self.example_json['url']

        document = Document.from_url(url, self.auth)

        mocked_get.assert_called_with(url)

        self.assertIsInstance(document, Document)

        for k,v in self.example_json.items():
            if k != "properties":
                self.assertEqual(getattr(document, k), v)
        self.assertEqual(document.properties_url,
                         self.example_json['properties'])

        self.assertEqual(document.session.auth, self.auth)
def main():
    cursor = pypln_temp.find()

    while pypln_temp.count() > 0:
        for article in cursor:
            try:
                url = article['pypln_url']
                my_doc = Document.from_url(url, settings.PYPLN_CREDENTIALS)
                _id = article['articles_id']
                _id_temp = article['_id']
            except RuntimeError as e:
                logger.error("The document {} could not be found on the PyPLN collection: {}".format(url, e))
                continue

            if '_exception' in my_doc.properties:
                logger.warning("PyPLN found an error {}".format(article['pypln_url']))
                articles.update({'_id': _id}, {'$set': {'status': 2}})
                pypln_temp.remove({'_id': _id_temp})
                continue

            if len(my_doc.properties) < 29:
                if 'time' in article:
                    if (datetime.datetime.now() - article['time']).seconds/60 > 5:
                        logger.warning("PyPLN could not finish the analysis {}".format(article['pypln_url']))
                        articles.update({'_id': _id}, {'$set': {'status': 2}})
                        pypln_temp.remove({'_id': _id_temp})
                    else:
                        continue
                else:
                    pypln_temp.update({'_id': _id_temp}, {'$set': {'time': datetime.datetime.now()}})

            else:
                analysis = {'articles_id': _id}
                for property in my_doc.properties:
                    analysis[property] = my_doc.get_property(property)

                articles_analysis.insert(analysis)
                articles.update({'_id': _id}, {'$set': {'status': 1}})
                pypln_temp.remove({'_id': _id_temp})
        cursor = pypln_temp.find()
    cursor.close()
Exemple #20
0
    def test_properties_is_a_list_of_properties(self, mocked_get):
        """ When accessing `document.properties' the user should get a list of
        properties, not a url for the resource."""
        expected_properties = [
            "mimetype", "freqdist", "average_sentence_repertoire", "language",
            "average_sentence_length", "sentences", "momentum_1", "pos",
            "momentum_3", "file_metadata", "tokens", "repertoire", "text",
            "tagset", "momentum_4", "momentum_2"
        ]

        mocked_get.return_value.status_code = 200
        mocked_get.return_value.json.return_value = {
            'properties': [
                self.example_json['properties'] + prop + '/'
                for prop in expected_properties
            ]
        }

        document = Document(session=self.session, **self.example_json)

        self.assertEqual(document.properties, expected_properties)
        mocked_get.assert_called_with(self.example_json['properties'])