コード例 #1
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def _get_city_vendor_id(html):
        '''
        Parses the contract page's HTML to find the vendor ID.

        :param html: The contract page's HTML.
        :type html: string
        :returns: string. The vendor ID, or an empty string if none is found.
        '''

        pattern = r"(?<=ExternalVendorProfile\(')\d+"
        vendor_ids = re.findall(pattern, html)

        if len(vendor_ids) == 0:
            log.error('No vendor ID found')
            vendor_id = ""
        else:
            # You need to take the first one for this list or you'll sometimes
            # end up w/ the vendor_id for a subcontractor, which will sometimes
            # end up on the vendor page.
            # http://www.purchasing.cityofno.com/bso/external/purchaseorder/
            # poSummary.sdo?docId=FC154683&releaseNbr=0&parentUrl=contract
            vendor_id = vendor_ids[0]
            log.debug('Vendor ID %s', vendor_id)

        return vendor_id
コード例 #2
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def _get_knumber(soup):
        '''
        Find the k number in the contract page HTML, under "Alternate ID."

        :param soup: A BeautifulSoup object for the contract page HTML.
        :type soup: BeautifulSoup object.
        :returns: string. The contract's K number.
        '''

        main_table = soup.select('.table-01').pop()

        metadata_row = (main_table.findChildren(['tr'])[2].findChildren(
            ['td'])[0].findChildren(['table'])[0].findChildren(['tr']))

        try:
            knumber = (metadata_row[6].findChildren(['td'])[1].contents.pop())

            # Remove extra characters:
            knumber = (knumber.replace('k', '').replace('K', '').replace(
                'm', '').replace('M', '').strip())
        except Exception as e:
            log.error(e, exc_info=True)
            knumber = "unknown"

        if len(knumber) == 0:  # Empty string
            knumber = "unknown"

        return knumber
コード例 #3
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def __init__(self, purchase_order_number):
        self.purchaseorder = purchase_order_number

        validity = Utilities().check_if_valid_purchase_order_format(
            self.purchaseorder)
        if validity is False:
            log.debug('Purchase order %s is invalid', self.purchaseorder)
            return

        html = self._get_html()
        self.vendor_id_city = self._get_city_vendor_id(html)
        self._download_vendor_profile(self.vendor_id_city)

        soup = BeautifulSoup(html)
        self.description = self._get_description(soup)

        try:
            self.vendor_name = self._get_vendor_name()
        except IOError as e:
            log.error(e, exc_info=True)

            self.vendor_name = "unknown"

            log.info('No vendor info for purchase order %s',
                     self.purchaseorder)

        self.department = self._get_department(soup)
        self.k_number = self._get_knumber(soup)
        self.attachments = self._get_attachments(soup)
        self.data = self._get_data()
        self.title = "%s : %s" % (self.vendor_name, self.description)
コード例 #4
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def _get_city_vendor_id(html):
        '''
        Parses the contract page's HTML to find the vendor ID.

        :param html: The contract page's HTML.
        :type html: string
        :returns: string. The vendor ID, or an empty string if none is found.
        '''

        pattern = r"(?<=ExternalVendorProfile\(')\d+"
        vendor_ids = re.findall(pattern, html)

        if len(vendor_ids) == 0:
            log.error('No vendor ID found')
            vendor_id = ""
        else:
            # You need to take the first one for this list or you'll sometimes
            # end up w/ the vendor_id for a subcontractor, which will sometimes
            # end up on the vendor page.
            # http://www.purchasing.cityofno.com/bso/external/purchaseorder/
            # poSummary.sdo?docId=FC154683&releaseNbr=0&parentUrl=contract
            vendor_id = vendor_ids[0]
            log.debug('Vendor ID %s', vendor_id)

        return vendor_id
コード例 #5
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def __init__(self, purchase_order_number):
        self.purchaseorder = purchase_order_number

        validity = Utilities().check_if_valid_purchase_order_format(
            self.purchaseorder)
        if validity is False:
            log.debug('Purchase order %s is invalid', self.purchaseorder)
            return

        html = self._get_html()
        self.vendor_id_city = self._get_city_vendor_id(html)
        self._download_vendor_profile(self.vendor_id_city)

        soup = BeautifulSoup(html)
        self.description = self._get_description(soup)

        try:
            self.vendor_name = self._get_vendor_name()
        except IOError as e:
            log.error(e, exc_info=True)

            self.vendor_name = "unknown"

            log.info('No vendor info for purchase order %s',
                     self.purchaseorder)

        self.department = self._get_department(soup)
        self.k_number = self._get_knumber(soup)
        self.attachments = self._get_attachments(soup)
        self.data = self._get_data()
        self.title = "%s : %s" % (self.vendor_name, self.description)
コード例 #6
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def _get_knumber(soup):
        '''
        Find the k number in the contract page HTML, under "Alternate ID."

        :param soup: A BeautifulSoup object for the contract page HTML.
        :type soup: BeautifulSoup object.
        :returns: string. The contract's K number.
        '''

        main_table = soup.select('.table-01').pop()

        metadata_row = (main_table
                        .findChildren(['tr'])[2]
                        .findChildren(['td'])[0]
                        .findChildren(['table'])[0]
                        .findChildren(['tr']))

        try:
            knumber = (metadata_row[6]
                       .findChildren(['td'])[1]
                       .contents.pop())

            # Remove extra characters:
            knumber = (knumber.replace('k', '').replace('K', '')
                       .replace('m', '').replace('M', '').strip())
        except Exception as e:
            log.error(e, exc_info=True)
            knumber = "unknown"

        if len(knumber) == 0:  # Empty string
            knumber = "unknown"

        return knumber
コード例 #7
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def _get_description(soup):
        '''
        Find the description in the HTML.

        :param soup: A BeautifulSoup object for the contract page HTML.
        :type soup: BeautifulSoup object.
        :returns: string. The contract description on the city purchasing site.
        '''

        try:
            main_table = soup.select('.table-01').pop()
            metadata_row = (main_table
                            .findChildren(['tr'])[2]
                            .findChildren(['td'])[0]
                            .findChildren(['table'])[0]
                            .findChildren(['tr']))

            description = (metadata_row[1]
                           .findChildren(['td'])[5]
                           .contents.pop().strip())

            return str(description)
        except Exception as e:
            log.error(e, exc_info=True)
            return ""
コード例 #8
0
    def _get_metadata(document, field):
        '''Fetch this metadata from our DocumentCloud project.'''

        try:
            output = document.data[field]
            if len(output) == 0:
                output = "unknown"
        except Exception as e:
            log.error(e, exc_info=True)
            output = "unknown"

        return output
コード例 #9
0
    def _get_metadata(document, field):
        '''Fetch this metadata from our DocumentCloud project.'''

        try:
            output = document.data[field]
            if len(output) == 0:
                output = "unknown"
        except Exception as e:
            log.error(e, exc_info=True)
            output = "unknown"

        return output
コード例 #10
0
    def match_local_database_to_document_cloud(self):
        '''
        Match our local database to our DocumentCloud project.

        TODO: Why fetching half-filled contracts?
        '''

        half_filled_contracts = LensDatabase().get_half_filled_contracts()
        log.info('%d half-filled contracts need to be synced',
                 len(half_filled_contracts))

        for half_filled_contract in half_filled_contracts:
            try:
                contract = self.client.documents.get(
                    half_filled_contract.doc_cloud_id)
                self._match_contract(contract)
            except Exception as e:
                log.error(e, exc_info=True)
コード例 #11
0
    def match_local_database_to_document_cloud(self):
        '''
        Match our local database to our DocumentCloud project.

        TODO: Why fetching half-filled contracts?
        '''

        half_filled_contracts = LensDatabase().get_half_filled_contracts()
        log.info('%d half-filled contracts need to be synced',
                 len(half_filled_contracts))

        for half_filled_contract in half_filled_contracts:
            try:
                contract = self.client.documents.get(
                    half_filled_contract.doc_cloud_id
                )
                self._match_contract(contract)
            except Exception as e:
                log.error(e, exc_info=True)
コード例 #12
0
ファイル: purchase_order.py プロジェクト: TheLens/contracts
    def _get_description(soup):
        '''
        Find the description in the HTML.

        :param soup: A BeautifulSoup object for the contract page HTML.
        :type soup: BeautifulSoup object.
        :returns: string. The contract description on the city purchasing site.
        '''

        try:
            main_table = soup.select('.table-01').pop()
            metadata_row = (main_table.findChildren(['tr'])[2].findChildren(
                ['td'])[0].findChildren(['table'])[0].findChildren(['tr']))

            description = (metadata_row[1].findChildren(
                ['td'])[5].contents.pop().strip())

            return str(description)
        except Exception as e:
            log.error(e, exc_info=True)
            return ""