Esempio n. 1
0
    def _update_contract_from_document_cloud(self, document_cloud_id, fields):
        """
        Update an existing contract in the local database.

        TODO: compare to add_contract(), because this doesn't update. It adds.

        :param document_cloud_id: The unique ID in DocumentCloud.
        :type document_cloud_id: string
        :param fields: The metadata fields to add along with the contract?
        :type fields: dict
        """
        log.debug('Updating contract in database that has DocumentCloud ID %s',
                  document_cloud_id)

        contract = (SESSION.query(Contract).filter(
            Contract.doc_cloud_id == document_cloud_id).first())

        contract.contractnumber = fields['contractno']
        contract.vendorid = fields['vendor']
        contract.departmentid = fields['department']
        contract.dateadded = fields['dateadded']
        contract.title = fields['title']
        contract.purchaseordernumber = fields['purchaseno']
        contract.description = fields['description']

        SESSION.add(contract)
        SESSION.commit()
    def prepare_then_add_contract(self, purchase_order_object):
        '''
        Call on method to make minor adjustments, then call on another method
        to upload the contract file and its metadata to the DocumentCloud
        project.

        :param purchase_order_object: A PurchaseOrder object instance.
        '''

        # Verify that there is at least one file to download.
        number_of_attachments = len(purchase_order_object.attachments)

        log.debug('There are %d attachments to upload', number_of_attachments)

        if number_of_attachments > 0:
            for i, attachment in enumerate(purchase_order_object.attachments):
                attachment_id = re.search(
                    '[0-9]+', attachment.get('href')).group()
                attachment_location = (
                    '%s/%s.pdf' % (DOCUMENTS_DIR, attachment_id)
                )

                purchase_order_object = self.prepare_contract(
                    purchase_order_object,
                    i
                )
                self._upload_contract(
                    attachment_location,
                    purchase_order_object
                )
Esempio n. 3
0
    def __init__(self, purchase_order_number):
        self.purchaseorder = purchase_order_number

        validity = Utilities().check_if_valid_purchase_order_format(
            self.purchaseorder)
        if validity is False:
            log.debug('Purchase order %s is invalid', self.purchaseorder)
            return

        html = self._get_html()
        self.vendor_id_city = self._get_city_vendor_id(html)
        self._download_vendor_profile(self.vendor_id_city)

        soup = BeautifulSoup(html)
        self.description = self._get_description(soup)

        try:
            self.vendor_name = self._get_vendor_name()
        except IOError as e:
            log.error(e, exc_info=True)

            self.vendor_name = "unknown"

            log.info('No vendor info for purchase order %s',
                     self.purchaseorder)

        self.department = self._get_department(soup)
        self.k_number = self._get_knumber(soup)
        self.attachments = self._get_attachments(soup)
        self.data = self._get_data()
        self.title = "%s : %s" % (self.vendor_name, self.description)
Esempio n. 4
0
    def process_direct_hit(self, raw_html, vendor_name):
        """TODO."""
        vendor_name = vendor_name.strip("\n").replace(".", "")

        log.debug("Adding vendor %s", vendor_name)

        self.add_vendor(vendor_name)
        soup = BeautifulSoup(raw_html)

        try:
            officers = soup.find_all(
                id="ctl00_cphContent_pnlOfficers")[0].select(".TableBorder")
        except IndexError:
            # some places have no listed officers. ex 311 networks
            officers = []

        # agents = []

        # try:
        #     agents = soup.find_all(
        #         id="ctl00_cphContent_pnlAgents")[0].select(".TableBorder")
        # except:
        #     agents = []

        for officer in officers:
            name = [l.text for l in officer.select("span")].pop(0)
            self.add_name(name)
            self.link(name, vendor_name)
Esempio n. 5
0
    def _get_city_vendor_id(html):
        '''
        Parses the contract page's HTML to find the vendor ID.

        :param html: The contract page's HTML.
        :type html: string
        :returns: string. The vendor ID, or an empty string if none is found.
        '''

        pattern = r"(?<=ExternalVendorProfile\(')\d+"
        vendor_ids = re.findall(pattern, html)

        if len(vendor_ids) == 0:
            log.error('No vendor ID found')
            vendor_id = ""
        else:
            # You need to take the first one for this list or you'll sometimes
            # end up w/ the vendor_id for a subcontractor, which will sometimes
            # end up on the vendor page.
            # http://www.purchasing.cityofno.com/bso/external/purchaseorder/
            # poSummary.sdo?docId=FC154683&releaseNbr=0&parentUrl=contract
            vendor_id = vendor_ids[0]
            log.debug('Vendor ID %s', vendor_id)

        return vendor_id
Esempio n. 6
0
    def _get_city_vendor_id(html):
        '''
        Parses the contract page's HTML to find the vendor ID.

        :param html: The contract page's HTML.
        :type html: string
        :returns: string. The vendor ID, or an empty string if none is found.
        '''

        pattern = r"(?<=ExternalVendorProfile\(')\d+"
        vendor_ids = re.findall(pattern, html)

        if len(vendor_ids) == 0:
            log.error('No vendor ID found')
            vendor_id = ""
        else:
            # You need to take the first one for this list or you'll sometimes
            # end up w/ the vendor_id for a subcontractor, which will sometimes
            # end up on the vendor page.
            # http://www.purchasing.cityofno.com/bso/external/purchaseorder/
            # poSummary.sdo?docId=FC154683&releaseNbr=0&parentUrl=contract
            vendor_id = vendor_ids[0]
            log.debug('Vendor ID %s', vendor_id)

        return vendor_id
Esempio n. 7
0
    def __init__(self, purchase_order_number):
        self.purchaseorder = purchase_order_number

        validity = Utilities().check_if_valid_purchase_order_format(
            self.purchaseorder)
        if validity is False:
            log.debug('Purchase order %s is invalid', self.purchaseorder)
            return

        html = self._get_html()
        self.vendor_id_city = self._get_city_vendor_id(html)
        self._download_vendor_profile(self.vendor_id_city)

        soup = BeautifulSoup(html)
        self.description = self._get_description(soup)

        try:
            self.vendor_name = self._get_vendor_name()
        except IOError as e:
            log.error(e, exc_info=True)

            self.vendor_name = "unknown"

            log.info('No vendor info for purchase order %s',
                     self.purchaseorder)

        self.department = self._get_department(soup)
        self.k_number = self._get_knumber(soup)
        self.attachments = self._get_attachments(soup)
        self.data = self._get_data()
        self.title = "%s : %s" % (self.vendor_name, self.description)
Esempio n. 8
0
    def _download_attachment(self, attachment):
        '''
        Download an attachment associated with a purchase order.

        :param attachment: The name of the attachment file to download.
        :type attachment: string
        '''

        # The city's purchasing site has an internal ID for each attachment.
        # Here we use it to download the attachment files, and also to store
        # locally so we can have a list of the attachments we have on hand.
        city_attachment_id = re.search(
            '[0-9]+', attachment.get('href')).group()
        log.debug('Gathering data for attachment %s', city_attachment_id)

        document_path = '%s/%s.pdf' % (DOCUMENTS_DIR, city_attachment_id)

        display_name = self._get_attachment_display_name(city_attachment_id)

        if os.path.isfile(document_path):  # Have already downloaded
            log.info('Already have PDF for attachment %s', city_attachment_id)
        else:
            self._download_attachment_file(city_attachment_id,
                                           display_name,
                                           document_path)
Esempio n. 9
0
    def _find_number_of_pages(self):
        '''
        Finds how many pages of contracts there are on the city's
        purchasing site.

        :returns: int. The number of pages.
        '''

        html = self._get_index_page(1)
        soup = BeautifulSoup(html)

        main_table = soup.select('.table-01').pop()

        metadata_row = main_table.find_all(
            'tr',
            recursive=False)[3].findChildren(  # [3] if zero-based, [4] if not
                ['td'])

        metadata_row = metadata_row[0].findChildren(['table'])[0].findChildren(
            ['tr'])[0].findChildren(['td'])[0].findChildren(
                ['table'])[0].findChildren(['tr'])[1]

        href = metadata_row.findChildren(['td'])[0].findChildren(
            ['a'])[-1].get('href')

        number_of_pages = re.search('[0-9]+', href).group()

        log.debug("There were %d pages found on the city's purchasing portal",
                  number_of_pages)

        return int(number_of_pages)
Esempio n. 10
0
    def process_direct_hit(self, raw_html, vendor_name):
        """TODO."""
        vendor_name = vendor_name.strip("\n").replace(".", "")

        log.debug("Adding vendor %s", vendor_name)

        self.add_vendor(vendor_name)
        soup = BeautifulSoup(raw_html)

        try:
            officers = soup.find_all(
                id="ctl00_cphContent_pnlOfficers")[0].select(".TableBorder")
        except IndexError:
            # some places have no listed officers. ex 311 networks
            officers = []

        # agents = []

        # try:
        #     agents = soup.find_all(
        #         id="ctl00_cphContent_pnlAgents")[0].select(".TableBorder")
        # except:
        #     agents = []

        for officer in officers:
            name = [l.text for l in officer.select("span")].pop(0)
            self.add_name(name)
            self.link(name, vendor_name)
Esempio n. 11
0
    def check_pages(self):
        '''
        Runs a scan for each of the 10 most recent pages on the city's
        purchasing website.

        :params pages: A range of page numbers to check.
        :type pages: list.
        '''

        number_of_pages = self._find_number_of_pages()

        new_pages = range(1, 11)
        old_pages = range(11, number_of_pages + 1)

        shuffle(new_pages)
        shuffle(old_pages)

        new_counter = 0
        for new_page in new_pages:
            log.debug('New page %d', new_page)

            need_to_scrape = LensDatabase().check_if_need_to_scrape(new_page)

            if need_to_scrape is False:
                continue

            self._scan_index_page(new_page)

            LensDatabase().update_scrape_log(new_page)
            new_counter += 1

            # Run five times per day, so break after 2 pages in order to reach
            # 10 pages per day.
            if new_counter == 2:
                break
            time.sleep(10)

        old_counter = 0
        for old_page in old_pages:
            log.debug('Old page %s', old_page)

            need_to_scrape = LensDatabase().check_if_need_to_scrape(old_page)

            if need_to_scrape is False:
                continue

            self._scan_index_page(old_page)

            LensDatabase().update_scrape_log(old_page)
            old_counter += 1

            # Run five times per day, seven days per week, so break after 13
            # pages in order to reach about 450 pages per week.
            if old_counter == 13:
                break
            time.sleep(10)
Esempio n. 12
0
    def check_pages(self):
        '''
        Runs a scan for each of the 10 most recent pages on the city's
        purchasing website.

        :params pages: A range of page numbers to check.
        :type pages: list.
        '''

        number_of_pages = self._find_number_of_pages()

        new_pages = range(1, 11)
        old_pages = range(11, number_of_pages + 1)

        shuffle(new_pages)
        shuffle(old_pages)

        new_counter = 0
        for new_page in new_pages:
            log.debug('New page %d', new_page)

            need_to_scrape = LensDatabase().check_if_need_to_scrape(new_page)

            if need_to_scrape is False:
                continue

            self._scan_index_page(new_page)

            LensDatabase().update_scrape_log(new_page)
            new_counter += 1

            # Run five times per day, so break after 2 pages in order to reach
            # 10 pages per day.
            if new_counter == 2:
                break
            time.sleep(10)

        old_counter = 0
        for old_page in old_pages:
            log.debug('Old page %s', old_page)

            need_to_scrape = LensDatabase().check_if_need_to_scrape(old_page)

            if need_to_scrape is False:
                continue

            self._scan_index_page(old_page)

            LensDatabase().update_scrape_log(old_page)
            old_counter += 1

            # Run five times per day, seven days per week, so break after 13
            # pages in order to reach about 450 pages per week.
            if old_counter == 13:
                break
            time.sleep(10)
Esempio n. 13
0
    def _add_department(self, department):
        """
        Add department to the local database.

        :param meta_field: The department to add to local database.
        :type meta_field: string
        """
        log.debug('Adding department "%s" to database', department)

        SESSION.add(Department(department))
        SESSION.commit()
Esempio n. 14
0
    def _add_vendor(self, vendor, vendor_id_city=None):
        """
        Add vendor to the local database.

        :param vendor: The vendor to add to our database.
        :type vendor: string
        """
        log.debug('Adding vendor "%s" to database', vendor)

        vendor = Vendor(vendor, vendor_id_city)

        SESSION.add(vendor)
        SESSION.commit()
Esempio n. 15
0
    def _get_database_vendor_id(self, vendor):
        """
        Get a vendor's ID from our database.

        :param vendor: The vendor name.
        :type vendor: string
        :returns: string. The database's vendor ID for this vendor.
        """
        log.debug('Fetching database ID for vendor "%s"', vendor)

        vendor = (SESSION.query(Vendor).filter(Vendor.name == vendor).first())

        SESSION.close()

        return vendor.id
Esempio n. 16
0
    def _get_department_id(self, department):
        """
        Get the department's ID from our database.

        :param department: The department name.
        :type department: string
        :returns: string. The database ID for the department name.
        """
        log.debug('Finding ID for department "%s" in database', department)

        department = (SESSION.query(Department).filter(
            Department.name == department).first())

        SESSION.close()

        return department.id
Esempio n. 17
0
    def _get_contract_doc_cloud_id(self, document_cloud_id):
        """
        Get a contract from the DocumentCloud project.

        :param document_cloud_id: The unique ID in the DocumentCloud project.
        :type document_cloud_id: string
        :returns: dict. A dict (?) for the matching contract.
        """
        log.debug('Find contract in database that has DocumentCloud ID %s',
                  document_cloud_id)

        query = (SESSION.query(Contract).filter(
            Contract.doc_cloud_id == document_cloud_id).first())

        SESSION.close()

        return query
Esempio n. 18
0
    def _check_if_need_to_download_contract(purchase_order_number):
        '''
        Determines whether this contract should be downloaded, and also whether
        it needs to be added to our DocumentCloud and local database.

        :param purchase_order_number: The contract's purchase order number.
        :type purchase_order_number: string
        '''

        log.info('Checking purchase order %s', purchase_order_number)

        # Check local file repository
        try:
            log.debug('LensRepository')

            need_to_download = LensRepository(
                purchase_order_number).check_if_need_to_download()
            if need_to_download:
                LensRepository(purchase_order_number).download_purchase_order()
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        try:
            log.debug('PurchaseOrder')

            purchase_order_object = PurchaseOrder(purchase_order_number)
            purchase_order_object.download_attachments()
        except IndexError:
            log.exception(purchase_order_number)
            return

        # Check DocumentCloud project
        try:
            log.debug('DocumentCloudProject')

            need_to_upload = DocumentCloudProject().check_if_need_to_upload(
                purchase_order_number)
            if need_to_upload:
                DocumentCloudProject().prepare_then_add_contract(
                    purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        # Check local database
        try:
            log.debug('LensDatabase')

            contract_exist = LensDatabase().check_if_database_has_contract(
                purchase_order_number)
            if contract_exist is False:
                LensDatabase().add_to_database(purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s is not posted publically.',
                          purchase_order_number)
Esempio n. 19
0
    def _check_if_vendor_exists(self, vendor):
        """
        Check if database has this vendor.

        :param vendor: The vendor to check for.
        :type vendor: string?
        :returns: boolean. True if vendor exists in database, False if not.
        """
        count = (SESSION.query(Vendor).filter(Vendor.name == vendor).count())

        SESSION.close()

        if count == 0:
            log.debug('Vendor "%s" is missing from database', vendor)
            return False
        else:
            return True
Esempio n. 20
0
    def _scan_index_page(self, page_number):
        '''
        Run the downloader helper for this page on the purchasing site.

        :param page_number: The page to check on the city's website.
        :type page_number: string
        '''

        html = self._get_index_page(page_number)
        purchase_order_numbers = self._get_purchase_order_numbers(html)

        for i, purchase_order_number in enumerate(purchase_order_numbers):
            log.debug('Purchase order %s', purchase_order_number)
            log.debug('(%d of %d)', i + 1, len(purchase_order_numbers))

            self._check_if_need_to_download_contract(purchase_order_number)
            time.sleep(2)
Esempio n. 21
0
    def _check_if_need_to_download_contract(purchase_order_number):
        '''
        Determines whether this contract should be downloaded, and also whether
        it needs to be added to our DocumentCloud and local database.

        :param purchase_order_number: The contract's purchase order number.
        :type purchase_order_number: string
        '''

        log.info('Checking purchase order %s', purchase_order_number)

        # Check local file repository
        try:
            log.debug('LensRepository')

            need_to_download = LensRepository(
                purchase_order_number).check_if_need_to_download()
            if need_to_download:
                LensRepository(purchase_order_number).download_purchase_order()
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        try:
            log.debug('PurchaseOrder')

            purchase_order_object = PurchaseOrder(purchase_order_number)
            purchase_order_object.download_attachments()
        except IndexError:
            log.exception(purchase_order_number)
            return

        # Check DocumentCloud project
        try:
            log.debug('DocumentCloudProject')

            need_to_upload = DocumentCloudProject().check_if_need_to_upload(
                purchase_order_number)
            if need_to_upload:
                DocumentCloudProject().prepare_then_add_contract(
                    purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        # Check local database
        try:
            log.debug('LensDatabase')

            contract_exist = LensDatabase().check_if_database_has_contract(
                purchase_order_number)
            if contract_exist is False:
                LensDatabase().add_to_database(purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s is not posted publically.',
                          purchase_order_number)
Esempio n. 22
0
    def _scan_index_page(self, page_number):
        '''
        Run the downloader helper for this page on the purchasing site.

        :param page_number: The page to check on the city's website.
        :type page_number: string
        '''

        html = self._get_index_page(page_number)
        purchase_order_numbers = self._get_purchase_order_numbers(html)

        for i, purchase_order_number in enumerate(purchase_order_numbers):
            log.debug('Purchase order %s', purchase_order_number)
            log.debug('(%d of %d)', i + 1, len(purchase_order_numbers))

            self._check_if_need_to_download_contract(purchase_order_number)
            time.sleep(2)
Esempio n. 23
0
    def _check_if_department_exists(self, department):
        """
        Check if database has this department.

        :param department: ???
        :type department: ???
        :returns: boolean. True if it exists in the database, False if not.
        """
        count = (SESSION.query(Department).filter(
            Department.name == department).count())

        SESSION.close()

        if count == 0:
            log.debug('Department "%s" is missing from database', department)
            return False
        else:
            return True
Esempio n. 24
0
    def _find_number_of_pages(self):
        '''
        Finds how many pages of contracts there are on the city's
        purchasing site.

        :returns: int. The number of pages.
        '''

        html = self._get_index_page(1)
        soup = BeautifulSoup(html)

        main_table = soup.select('.table-01').pop()

        metadata_row = main_table.find_all(
            'tr', recursive=False
        )[3].findChildren(  # [3] if zero-based, [4] if not
            ['td']
        )

        metadata_row = metadata_row[0].findChildren(
            ['table']
        )[0].findChildren(
            ['tr']
        )[0].findChildren(
            ['td']
        )[0].findChildren(
            ['table']
        )[0].findChildren(
            ['tr']
        )[1]

        href = metadata_row.findChildren(
            ['td']
        )[0].findChildren(
            ['a']
        )[-1].get('href')

        number_of_pages = re.search(
            '[0-9]+', href).group()

        log.debug("There were %d pages found on the city's purchasing portal",
                  number_of_pages)

        return int(number_of_pages)
Esempio n. 25
0
    def check_if_need_to_download(self):
        '''
        Checks local directory to determine whether a local copy is needed.

        :returns: boolean. True if need to download, False if don't need to.
        '''

        # Check if contract has valid format and is public
        validity = Utilities().check_that_contract_is_valid_and_public(
            self.purchase_order_number)

        file_location = ('%s/%s.html' %
                         (PURCHASE_ORDER_DIR, self.purchase_order_number))
        local_copy_exists = os.path.isfile(file_location)

        if validity is False or local_copy_exists:
            log.debug("Don't download. Contract is invalid, private or we " +
                      "already the HTML")
            return False  # Don't download
        else:
            return True
Esempio n. 26
0
    def _upload_contract(self, filename, purchase_order_object):
        '''
        This actually uploads a contract to our DocumentCloud project.

        :param filename: The path to the downloaded contract PDF file (?).
        :type filename: string
        :param description: The contract's description.
        :type description: string.
        :param title: The contract's title.
        :type title: string.
        '''

        log.debug('Uploading purchase order %s to DocumentCloud', filename)

        is_null = self._check_if_contract_number_is_null(purchase_order_object)
        if is_null:
            return

        purchase_order_object.title = purchase_order_object.title.replace(
            "/", "")  # Not sure why this is necessary

        purchase_order_number = str(purchase_order_object.purchaseorder)
        title = str(purchase_order_object.title)

        log.debug('Uploading purchase order %s ("%s") to DocumentCloud...',
                  purchase_order_number, title)

        self.api_connection.documents.upload(
            filename,
            title,
            'City of New Orleans',  # Source of this file
            purchase_order_object.description,
            None,  # Related article
            PROJECT_URL,  # Published URL
            'public',  # Access
            self.project_id,  # Project
            purchase_order_object.data,  # Data
            False  # Secure
        )
Esempio n. 27
0
    def check_if_need_to_download(self):
        '''
        Checks local directory to determine whether a local copy is needed.

        :returns: boolean. True if need to download, False if don't need to.
        '''

        # Check if contract has valid format and is public
        validity = Utilities().check_that_contract_is_valid_and_public(
            self.purchase_order_number)

        file_location = (
            '%s/%s.html' % (PURCHASE_ORDER_DIR, self.purchase_order_number))
        local_copy_exists = os.path.isfile(file_location)

        if validity is False or local_copy_exists:
            log.debug(
                "Don't download. Contract is invalid, private or we " +
                "already the HTML")
            return False  # Don't download
        else:
            return True
Esempio n. 28
0
    def _check_when_last_scraped(self, page):
        """
        Look up this page in scrape_log table to see when it was last scraped.

        :params page: The purchasing site's page to check.
        :type page: int.
        :returns: date. When this page was last scraped. None if never.
        """
        query = (SESSION.query(ScrapeLog).filter(ScrapeLog.page == page).all())

        if len(query) == 0:  # No row yet for this page (total number varies)
            return None

        SESSION.close()

        # for row in query:
        date_last_scraped = query.pop().last_scraped

        log.debug('This page was last scraped %s',
                  date_last_scraped.strftime('%Y-%m-%d'))

        return date_last_scraped
Esempio n. 29
0
    def check_if_need_to_upload(self, purchase_order_number):
        '''
        Checks DocumentCloud project to determine whether this contract needs
        to be uploaded.

        :param purchase_order_number: The contract's purchase order number.
        :type purchase_order_number: string.
        :returns: boolean. True if need to upload, False if don't need to.
        '''

        validity = Utilities().check_that_contract_is_valid_and_public(
            purchase_order_number)

        contract_exists = self._check_if_document_cloud_has_contract(
            "purchase order", purchase_order_number)

        if validity is False or contract_exists:
            log.debug('Not uploading to DocumentCloud')
            log.debug('Purchase order %s is invalid or already there',
                      purchase_order_number)
            return False
        else:
            return True
Esempio n. 30
0
    def add_to_database(self, purchase_order_object):
        """
        Add this contract to the local database.

        Initialize a Contract object class instance and fill out with the
        relevant information.

        :param purchase_order_object: The PurchaseOrder object instance.
        :type purchase_order_object: A PurchaseOrder object instance.
        """
        log.debug("Adding purchase order %s to contracts table",
                  purchase_order_object.purchaseorder)

        contract = Contract()

        # TODO: Might need to have a follow-up method that pulls from
        # DocumentCloud project and inserts its ID into this row in the
        # database.
        # contract.doc_cloud_id = TODO

        contract.contractnumber = purchase_order_object.k_number
        contract.purchaseordernumber = purchase_order_object.purchaseorder
        contract.description = purchase_order_object.description
        contract.title = purchase_order_object.title
        contract.dateadded = date.today()

        self._add_department_if_missing(purchase_order_object.department)
        self._add_vendor_if_missing(
            purchase_order_object.vendor_name,
            vendor_id_city=purchase_order_object.vendor_id_city)

        contract.departmentid = self._get_department_id(
            purchase_order_object.department)
        contract.vendorid = self._get_database_vendor_id(
            purchase_order_object.vendor_name)

        self._add_contract_to_local_database(contract)
Esempio n. 31
0
    def _download_attachment(self, attachment):
        '''
        Download an attachment associated with a purchase order.

        :param attachment: The name of the attachment file to download.
        :type attachment: string
        '''

        # The city's purchasing site has an internal ID for each attachment.
        # Here we use it to download the attachment files, and also to store
        # locally so we can have a list of the attachments we have on hand.
        city_attachment_id = re.search('[0-9]+',
                                       attachment.get('href')).group()
        log.debug('Gathering data for attachment %s', city_attachment_id)

        document_path = '%s/%s.pdf' % (DOCUMENTS_DIR, city_attachment_id)

        display_name = self._get_attachment_display_name(city_attachment_id)

        if os.path.isfile(document_path):  # Have already downloaded
            log.info('Already have PDF for attachment %s', city_attachment_id)
        else:
            self._download_attachment_file(city_attachment_id, display_name,
                                           document_path)
Esempio n. 32
0
    def _download_attachment_file(self,
                                  attachment_id,
                                  display_name,
                                  document_file_path):
        '''
        Download the attachment file found on contract page.

        :param attachment_id: The city's internal attachment ID.
        :type attachment_id: string
        :param document_file_path: The path for where to save the \
                                         attachment file.
        :type document_file_path: string
        '''
        log.debug('Saving PDF for attachment "%s" with city ID %s',
                  display_name, attachment_id)

        if not os.path.exists(attachment_id):
            # TODO: convert to Python
            call([
                'curl',
                '-s',
                '-o',
                document_file_path,
                'http://www.purchasing.cityofno.com/bso/external/document/' +
                'attachments/attachmentFileDetail.sdo',
                '-H',
                'Pragma: no-cache',
                '-H',
                'Origin: http://www.purchasing.cityofno.com',
                '-H',
                'Accept-Encoding: gzip, deflate',
                '-H',
                'Accept-Language: en-US,en;q=0.8',
                '-H',
                'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X ' +
                '10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) ' +
                'Chrome/43.0.2357.81 Safari/537.36',
                '-H',
                'Content-Type: multipart/form-data; boundary=----' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP',
                '-H',
                'Accept: text/html,application/xhtml+xml,application/' +
                'xml;q=0.9,image/webp,*/*;q=0.8',
                '-H',
                'Cache-Control: no-cache',
                '-H',
                'Referer: http://www.purchasing.cityofno.com/bso/external/' +
                'document/attachments/attachmentFileDetail.sdo?fileNbr=' +
                '%s&docId=%s' % (attachment_id, self.purchaseorder) +
                '&docType=P&releaseNbr=0&parentUrl=/external/purchaseorder/' +
                'poSummary.sdo&external=true',
                '-H',
                'Cookie: JSESSIONID=5FC84DA3EC020E1FC19700761C0EBEB3',
                '-H',
                'Connection: keep-alive',
                '--data-binary',
                '$\'------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="mode"\r\n\r\ndownload\r\n' +
                '------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="parentUrl"\r\n\r\n/external/' +
                'purchaseorder/poSummary.sdo\r\n------WebKitFormBoundary' +
                'GAY56ngXMDvs6qDP\r\nContent-Disposition: form-data; ' +
                'name="parentId"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="fileNbr"\r\n\r\n' +
                '%s' % attachment_id +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="workingDir"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="docId"\r\n\r\n' +
                '%s' % self.purchaseorder +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="docType"\r\n\r\nP\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="docSubType"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="releaseNbr"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="downloadFileNbr"\r\n\r\n' +
                '%s' % attachment_id +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="itemNbr"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="currentPage"\r\n\r\n1\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="querySql"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="sortBy"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="sortByIndex"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="sortByDescending"\r\n\r\nfalse\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="revisionNbr"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="receiptId"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="vendorNbr"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="vendorGrp"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="invoiceNbr"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="displayName"\r\n\r\n' +
                '%s' % display_name +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP--\r\n\'',
                '--compressed'
            ])
Esempio n. 33
0
    def _download_attachment_file(self, attachment_id, display_name,
                                  document_file_path):
        '''
        Download the attachment file found on contract page.

        :param attachment_id: The city's internal attachment ID.
        :type attachment_id: string
        :param document_file_path: The path for where to save the \
                                         attachment file.
        :type document_file_path: string
        '''
        log.debug('Saving PDF for attachment "%s" with city ID %s',
                  display_name, attachment_id)

        if not os.path.exists(attachment_id):
            # TODO: convert to Python
            call([
                'curl', '-s', '-o', document_file_path,
                'http://www.purchasing.cityofno.com/bso/external/document/' +
                'attachments/attachmentFileDetail.sdo', '-H',
                'Pragma: no-cache', '-H',
                'Origin: http://www.purchasing.cityofno.com', '-H',
                'Accept-Encoding: gzip, deflate', '-H',
                'Accept-Language: en-US,en;q=0.8', '-H',
                'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X ' +
                '10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) ' +
                'Chrome/43.0.2357.81 Safari/537.36', '-H',
                'Content-Type: multipart/form-data; boundary=----' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP', '-H',
                'Accept: text/html,application/xhtml+xml,application/' +
                'xml;q=0.9,image/webp,*/*;q=0.8', '-H',
                'Cache-Control: no-cache', '-H',
                'Referer: http://www.purchasing.cityofno.com/bso/external/' +
                'document/attachments/attachmentFileDetail.sdo?fileNbr=' +
                '%s&docId=%s' % (attachment_id, self.purchaseorder) +
                '&docType=P&releaseNbr=0&parentUrl=/external/purchaseorder/' +
                'poSummary.sdo&external=true', '-H',
                'Cookie: JSESSIONID=5FC84DA3EC020E1FC19700761C0EBEB3', '-H',
                'Connection: keep-alive', '--data-binary',
                '$\'------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="mode"\r\n\r\ndownload\r\n' +
                '------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="parentUrl"\r\n\r\n/external/' +
                'purchaseorder/poSummary.sdo\r\n------WebKitFormBoundary' +
                'GAY56ngXMDvs6qDP\r\nContent-Disposition: form-data; ' +
                'name="parentId"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="fileNbr"\r\n\r\n' + '%s' % attachment_id +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="workingDir"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="docId"\r\n\r\n' + '%s' % self.purchaseorder +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="docType"\r\n\r\nP\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="docSubType"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="releaseNbr"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="downloadFileNbr"\r\n\r\n' +
                '%s' % attachment_id +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-' +
                'Disposition: form-data; name="itemNbr"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="currentPage"\r\n\r\n1\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="querySql"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="sortBy"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="sortByIndex"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="sortByDescending"\r\n\r\nfalse\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="revisionNbr"\r\n\r\n0\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="receiptId"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="vendorNbr"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="vendorGrp"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="invoiceNbr"\r\n\r\n\r\n------' +
                'WebKitFormBoundaryGAY56ngXMDvs6qDP\r\nContent-Disposition: ' +
                'form-data; name="displayName"\r\n\r\n' + '%s' % display_name +
                '\r\n------WebKitFormBoundaryGAY56ngXMDvs6qDP--\r\n\'',
                '--compressed'
            ])