Beispiel #1
0
    def test_shouldExtractSummary(self):
        summarizer = mock()
        document_text = "This is one summary sentence. This is what you are trying to summarize. " \
                        + "In this test it does not matter what is the kind of text you supply, this is another summary sentence. " \
                        + "That is because it uses a mock and is trying to test the behavior of api. The API sanity check for " \
                        + "summary mechanism itself is showcased in summary test"
        document = Document(doc_id="123",
                            text=document_text,
                            summarizer=summarizer)
        sentence_map = {
            0:
            'This is one summary sentence.',
            1:
            'This is what you are trying to summarize.',
            2:
            'In this test it does not matter what is the kind of text you supply, this is another summary sentence.',
            3:
            'That is because it uses a mock and is trying to test the behavior of api.',
            4:
            'The API sanity check for summary mechanism itself is showcased in summary test'
        }
        tokenised_sentence_map = {
            0: ['summary', 'sentence'],
            1: ['summarize'],
            2: [
                'test', 'matter', 'kind', 'text', 'supply', 'summary',
                'sentence'
            ],
            3: ['mock', 'test', 'behavior', 'api'],
            4: [
                'API', 'sanity', 'check', 'summary', 'mechanism', 'showcased',
                'summary', 'test'
            ]
        }
        tokens = [
            'summary', 'sentence', 'summarize', 'test', 'matter', 'kind',
            'text', 'supply', 'summary', 'sentence', 'mock', 'test',
            'behavior', 'api', 'API', 'sanity', 'check', 'summary',
            'mechanism', 'showcased', 'summary', 'test'
        ]
        processed_document = ProcessedDocument(
            sentence_map=sentence_map,
            tokenised_sentence_map=tokenised_sentence_map,
            tokens=tokens)
        summary_sentences = [
            "This is one summary sentence", "this is another summary sentence"
        ]
        expected_summary = DocumentSummary("123", summary_sentences)
        when(summarizer).summarize_using_weighing_measures(
            processed_document).thenReturn(summary_sentences)

        document_summary = document.summary()

        verify(summarizer).summarize_using_weighing_measures(
            processed_document)
        self.assertEquals(expected_summary, document_summary)
Beispiel #2
0
    def put(self):
        request_body = self.request.body
        logger.debug("Request received for summarisation; Request body: %s" %
                     request_body[0:20])
        parameters = json.loads(request_body)

        callback_url = parameters["callback"]
        document_id = parameters["documentId"]
        extracted_text = parameters["extractedText"]
        compression_ratio = int(parameters["compressionRatio"])
        summarizer = Summarizer(compression_ratio=compression_ratio)
        document = Document(doc_id=document_id,
                            text=extracted_text,
                            summarizer=summarizer)

        try:
            logger.info("Generating Summary for document %s" % document_id)
            document_summary = document.summary()
        except Exception as e:
            logger.error(
                "Error while generating summary for document %s" % document_id,
                e)
            requests.post(callback_url,
                          data=json.dumps({
                              "status": "failure",
                              "message": e.message,
                              "documentId": document_id
                          }),
                          headers={'Content-Type': 'application/json'})
        else:
            logger.info(
                "Summarisation completed for document %s. Updating to callback %s and sample summary %s"
                %
                (document_id, callback_url, document_summary.to_json()[0:20]))
            requests.post(callback_url,
                          data=(document_summary.to_json()),
                          headers={'Content-Type': 'application/json'})