Beispiel #1
0
    def translate_web_query_to_dc_query(self, data):
        '''
        Translates search input parameters into a request string for the
        DocumentCloud API, which utilizes the Apache Lucene syntax.

        Use 'projectid:1542-city-of-new-orleans-contracts' to restrict search
        to our project.

        :param data: The query parameters.
        :type data: dict
        :returns: string. The query string ready for the DocumentCloud API.
        '''
        query_builder = QueryBuilder()
        query_builder.add_text(data['search_input'])
        query_builder.add_term(
            self.dc_query.split(':')[0],
            self.dc_query.split(':')[1])

        terms = ['vendor', 'department']

        for term in terms:
            query_value = data[term]
            if query_value != "":
                query_builder.add_term(term, query_value.upper())

        if len(data['officer']) > 0:
            officers = [data['officer']]
            log.debug('Officers: %s', officers)

            vendor = self.translate_officer_to_vendor(officers[0])
            query_builder.add_term("vendor", vendor.upper())

        return query_builder.get_query()
Beispiel #2
0
    def get_contracts(self, offset=0, limit=None):
        '''
        Query the database in reverse chronological order. Specify the number
        of recent contracts with offset and limit values.

        :param offset: The number of pages to offset database query.
        :type offset: int
        :param limit: The number of records to return.
        :type limit: int
        :returns: list. (?) The contracts that matched the query.
        '''
        # sn = sessionmaker(bind=self.engine)
        # session = sn()

        offset *= self.pagelength

        contracts = (SESSION.query(Contract).order_by(
            Contract.dateadded.desc()).offset(offset).limit(limit).all())

        SESSION.close()

        contracts = self.translate_to_doc_cloud_form(contracts)

        log.debug('Contracts: %s', contracts)

        return contracts
Beispiel #3
0
    def __init__(self):
        '''docstring'''
        self.pagelength = 10  # DocumentCloud API default is 10
        self.dc_query = 'projectid:1542-city-of-new-orleans-contracts'
        self.document_cloud_client = DocumentCloud()

        log.debug('%d documents', self.pagelength)
Beispiel #4
0
    def get_search_page(self, request):
        '''
        Gets the data necessary for the search page (/contracts/search/).

        :param request: The search parameters supplied by the user.
        :type request: dict
        :returns: dict. Two dicts: one for newly gather data, and the other \
        an altered version of the incoming search parameters.
        '''
        # Extract search parameters (text input and dropdown selections)
        data = self.parse_query_string(request)

        log.debug('User search parameters: %s', data)

        # Transform query parameters into string for DocumentCloud API.
        search_term = self.translate_web_query_to_dc_query(data)

        # Get a list of contracts by querying our project on DocCloud:
        documents = self.query_document_cloud(search_term,
                                              page=data['current_page'])

        # TODO: A second search?
        number_of_documents = self.find_number_of_documents(search_term)
        log.debug('%d documents', number_of_documents)
        log.debug('%d documents', len(documents))

        number_of_pages = (number_of_documents /
                           self.pagelength) + 1  # Zero-indexing

        log.debug('Found {:,} documents across {:,} pages'.format(
            number_of_documents, number_of_pages))

        updated_date = time.strftime("%b. %-d, %Y")

        # Correct for AP Style
        updated_date = updated_date.replace('Mar.', 'March')
        updated_date = updated_date.replace('Apr.', 'April')
        updated_date = updated_date.replace('May.', 'May')
        updated_date = updated_date.replace('Jun.', 'June')
        updated_date = updated_date.replace('Jul.', 'July')

        output_data = {
            'current_page': data['current_page'],
            'departments': self.get_departments(),
            'documents': documents,
            'number_of_documents': number_of_documents,
            'number_of_pages': number_of_pages,
            'officers': self.get_officers(),
            'results_language': ResultsLanguage(data,
                                                number_of_documents).main(),
            'search_input': data['search_input'],
            'updated_date': updated_date,
            'vendors': self.get_vendors()
        }

        return output_data, data
Beispiel #5
0
    def query_document_cloud(self, search_term, page=1):
        '''
        Queries the DocumentCloud API.
        This is it's own method so that queries can be cached via @memoize to
        speed things up.

        :param search_term: The query term to run against DocumentCloud API.
        :type search_term: string
        :param page: The page to receive in return. Useful for pagination. \
        Default: 1.
        :type page: string
        :returns: dict. (?) The output that matches the query.
        '''

        log.debug('DocumentCloud search: %s', search_term)
        log.debug('Showing %d results per page, page %d', self.pagelength,
                  page)

        output = self.document_cloud_client.documents.search(
            search_term, page=page, per_page=self.pagelength)

        log.debug('Found documents: %s', output)

        return output