def _get_city_vendor_id(html): ''' Parses the contract page's HTML to find the vendor ID. :param html: The contract page's HTML. :type html: string :returns: string. The vendor ID, or an empty string if none is found. ''' pattern = r"(?<=ExternalVendorProfile\(')\d+" vendor_ids = re.findall(pattern, html) if len(vendor_ids) == 0: log.error('No vendor ID found') vendor_id = "" else: # You need to take the first one for this list or you'll sometimes # end up w/ the vendor_id for a subcontractor, which will sometimes # end up on the vendor page. # http://www.purchasing.cityofno.com/bso/external/purchaseorder/ # poSummary.sdo?docId=FC154683&releaseNbr=0&parentUrl=contract vendor_id = vendor_ids[0] log.debug('Vendor ID %s', vendor_id) return vendor_id
def _get_knumber(soup): ''' Find the k number in the contract page HTML, under "Alternate ID." :param soup: A BeautifulSoup object for the contract page HTML. :type soup: BeautifulSoup object. :returns: string. The contract's K number. ''' main_table = soup.select('.table-01').pop() metadata_row = (main_table.findChildren(['tr'])[2].findChildren( ['td'])[0].findChildren(['table'])[0].findChildren(['tr'])) try: knumber = (metadata_row[6].findChildren(['td'])[1].contents.pop()) # Remove extra characters: knumber = (knumber.replace('k', '').replace('K', '').replace( 'm', '').replace('M', '').strip()) except Exception as e: log.error(e, exc_info=True) knumber = "unknown" if len(knumber) == 0: # Empty string knumber = "unknown" return knumber
def __init__(self, purchase_order_number): self.purchaseorder = purchase_order_number validity = Utilities().check_if_valid_purchase_order_format( self.purchaseorder) if validity is False: log.debug('Purchase order %s is invalid', self.purchaseorder) return html = self._get_html() self.vendor_id_city = self._get_city_vendor_id(html) self._download_vendor_profile(self.vendor_id_city) soup = BeautifulSoup(html) self.description = self._get_description(soup) try: self.vendor_name = self._get_vendor_name() except IOError as e: log.error(e, exc_info=True) self.vendor_name = "unknown" log.info('No vendor info for purchase order %s', self.purchaseorder) self.department = self._get_department(soup) self.k_number = self._get_knumber(soup) self.attachments = self._get_attachments(soup) self.data = self._get_data() self.title = "%s : %s" % (self.vendor_name, self.description)
def _get_knumber(soup): ''' Find the k number in the contract page HTML, under "Alternate ID." :param soup: A BeautifulSoup object for the contract page HTML. :type soup: BeautifulSoup object. :returns: string. The contract's K number. ''' main_table = soup.select('.table-01').pop() metadata_row = (main_table .findChildren(['tr'])[2] .findChildren(['td'])[0] .findChildren(['table'])[0] .findChildren(['tr'])) try: knumber = (metadata_row[6] .findChildren(['td'])[1] .contents.pop()) # Remove extra characters: knumber = (knumber.replace('k', '').replace('K', '') .replace('m', '').replace('M', '').strip()) except Exception as e: log.error(e, exc_info=True) knumber = "unknown" if len(knumber) == 0: # Empty string knumber = "unknown" return knumber
def _get_description(soup): ''' Find the description in the HTML. :param soup: A BeautifulSoup object for the contract page HTML. :type soup: BeautifulSoup object. :returns: string. The contract description on the city purchasing site. ''' try: main_table = soup.select('.table-01').pop() metadata_row = (main_table .findChildren(['tr'])[2] .findChildren(['td'])[0] .findChildren(['table'])[0] .findChildren(['tr'])) description = (metadata_row[1] .findChildren(['td'])[5] .contents.pop().strip()) return str(description) except Exception as e: log.error(e, exc_info=True) return ""
def _get_metadata(document, field): '''Fetch this metadata from our DocumentCloud project.''' try: output = document.data[field] if len(output) == 0: output = "unknown" except Exception as e: log.error(e, exc_info=True) output = "unknown" return output
def match_local_database_to_document_cloud(self): ''' Match our local database to our DocumentCloud project. TODO: Why fetching half-filled contracts? ''' half_filled_contracts = LensDatabase().get_half_filled_contracts() log.info('%d half-filled contracts need to be synced', len(half_filled_contracts)) for half_filled_contract in half_filled_contracts: try: contract = self.client.documents.get( half_filled_contract.doc_cloud_id) self._match_contract(contract) except Exception as e: log.error(e, exc_info=True)
def match_local_database_to_document_cloud(self): ''' Match our local database to our DocumentCloud project. TODO: Why fetching half-filled contracts? ''' half_filled_contracts = LensDatabase().get_half_filled_contracts() log.info('%d half-filled contracts need to be synced', len(half_filled_contracts)) for half_filled_contract in half_filled_contracts: try: contract = self.client.documents.get( half_filled_contract.doc_cloud_id ) self._match_contract(contract) except Exception as e: log.error(e, exc_info=True)
def _get_description(soup): ''' Find the description in the HTML. :param soup: A BeautifulSoup object for the contract page HTML. :type soup: BeautifulSoup object. :returns: string. The contract description on the city purchasing site. ''' try: main_table = soup.select('.table-01').pop() metadata_row = (main_table.findChildren(['tr'])[2].findChildren( ['td'])[0].findChildren(['table'])[0].findChildren(['tr'])) description = (metadata_row[1].findChildren( ['td'])[5].contents.pop().strip()) return str(description) except Exception as e: log.error(e, exc_info=True) return ""