Ejemplo n.º 1
0
 def try_to_link(self, vendor_name):
     """TODO."""
     search_results = self.search_sos(vendor_name)
     total_hits = self.get_total_hits(search_results)
     if total_hits == 1:
         log.info("Perfect match for %s", vendor_name)
         self.process_direct_hit(search_results, vendor_name)
    def _match_contract(self, document):
        '''
        Match a particular contract. TODO: Better description

        :params document: A Python-DocumentCloud object representing a contract
        :type document: Python-DocumentCloud object.
        '''
        log.info('Syncing document %s', document.id)

        fields = {}
        fields['purchaseno'] = self._get_metadata(document, "purchase order")
        fields['contractno'] = self._get_metadata(document, "contract number")
        fields['vendor'] = self._get_metadata(document,
                                              "vendor").replace(".", "")
        fields['department'] = self._get_metadata(document,
                                                  "vendor").replace(".", "")
        fields['dateadded'] = document.created_at
        fields['title'] = document.title
        fields['description'] = document.description

        LensDatabase().add_department(fields['department'])
        LensDatabase().add_vendor(fields['vendor'])

        fields['department'] = LensDatabase().get_department_id(
            fields['department'])
        fields['vendor'] = LensDatabase().get_lens_vendor_id(fields['vendor'])

        LensDatabase().update_contract_from_document_cloud(document.id, fields)
Ejemplo n.º 3
0
    def _get_attachment_display_name(self, city_attachment_id):
        '''docstring'''

        response = urllib2.urlopen(
            'http://www.purchasing.cityofno.com/bso/external/document/' +
            'attachments/attachmentFileDetail.sdo?' +
            'fileNbr=%s' % city_attachment_id +
            '&docId=%s' % self.purchaseorder +
            '&docType=P&releaseNbr=0&parentUrl=/external/purchaseorder/' +
            'poSummary.sdo&external=true'
        )
        html = response.read()

        file_location = '%s/%s.html' % (ATTACHMENTS_DIR, city_attachment_id)

        if not os.path.exists(os.path.dirname(file_location)):
            os.makedirs(os.path.dirname(file_location))

        with open(file_location, 'w') as filename:
            log.info('Saving HTML for attachment %s', city_attachment_id)
            filename.write(html)

        soup = BeautifulSoup(html)
        header = soup.select(".sectionheader-01")[0].contents.pop()
        header = ' '.join(header.split())

        attachment_file_name = str(header).replace(
            "Attachment File Detail:", "").strip()

        return attachment_file_name
    def _match_contract(self, document):
        '''
        Match a particular contract. TODO: Better description

        :params document: A Python-DocumentCloud object representing a contract
        :type document: Python-DocumentCloud object.
        '''
        log.info('Syncing document %s', document.id)

        fields = {}
        fields['purchaseno'] = self._get_metadata(document, "purchase order")
        fields['contractno'] = self._get_metadata(document, "contract number")
        fields['vendor'] = self._get_metadata(document, "vendor").replace(".", "")
        fields['department'] = self._get_metadata(document, "vendor").replace(".", "")
        fields['dateadded'] = document.created_at
        fields['title'] = document.title
        fields['description'] = document.description

        LensDatabase().add_department(fields['department'])
        LensDatabase().add_vendor(fields['vendor'])

        fields['department'] = LensDatabase().get_department_id(fields['department'])
        fields['vendor'] = LensDatabase().get_lens_vendor_id(fields['vendor'])

        LensDatabase().update_contract_from_document_cloud(document.id, fields)
Ejemplo n.º 5
0
    def _get_vendor_name(self):  # , soup):
        '''
        Find the vendor name in the contract HTML. If that fails, then ___

        :param soup: A BeautifulSoup object for the contract page HTML.
        :type soup: BeautifulSoup object.
        :returns: string. The contract vendor's name.
        '''
        vendor_file_location = '%s/%s.html' % (VENDORS_DIR,
                                               self.vendor_id_city)

        # Downloaded this file in _download_vendor_profile()
        with open(vendor_file_location, 'r') as myfile:
            log.info('Reading HTML for vendor %s', self.vendor_id_city)
            html = myfile.read()

        soup = BeautifulSoup(html)

        vendor_row = soup(text='Company Name:')[0].parent.parent

        vendor_name = (vendor_row.findChildren(['td'
                                                ])[5].contents.pop().strip())

        # Convert to uppercase for DocumentCloud project metadata.
        # Search queries are also converted to uppercase.
        vendor_name = vendor_name.upper()

        return vendor_name
Ejemplo n.º 6
0
    def _backup(self, document_cloud_id):
        '''Backup a contract.'''
        needs_backup = self._needs_to_be_backed_up(document_cloud_id)

        if needs_backup or self.force:
            log.info("Creating backup for %s", document_cloud_id)

            document = self.client.documents.get(document_cloud_id)
            metadata = self._get_meta_data(document)

            pdf_path = self._get_path(document_cloud_id, ".pdf")
            pdf_exists = os.path.exists(pdf_path)
            if not pdf_exists or self.force:
                pdf = document.pdf
                with open(pdf_path, "wb") as outfile:
                    outfile.write(pdf)

            txt_path = self._get_path(document_cloud_id, ".txt")
            txt_exists = os.path.exists(txt_path)
            if not txt_exists or self.force:
                with open(txt_path, "wb") as outfile:
                    outfile.write(json.dumps(metadata))

            text_txt_path = self._get_path(document_cloud_id, "_text.txt")
            text_txt_exists = os.path.exists(text_txt_path)
            if not text_txt_exists or self.force:
                with open(text_txt_path, "wb") as outfile:
                    outfile.write(json.dumps(document.full_text))
        else:
            log.info("%s is already is backed up", document_cloud_id)
Ejemplo n.º 7
0
    def add_name(self, name):
        """TODO."""
        name = name.replace(".", "").strip()
        if self.is_this_a_person(name):
            # people with Jr ect at the end of the name are people

            indb = (SESSION.query(Person).filter(Person.name == name).count())

            if indb == 0:
                SESSION.add(Person(name))
                SESSION.commit()
                return

            if indb == 1:
                SESSION.close()
                return

        if self._is_this_a_company(name):
            indb = (SESSION.query(Company).filter(
                Company.name == name).count())

            if indb == 0:
                SESSION.add(Company(name))
                SESSION.commit()
                return

            if indb == 1:
                SESSION.close()
                return

        log.info("Could not link %s", name)

        SESSION.close()
Ejemplo n.º 8
0
    def __init__(self, purchase_order_number):
        self.purchaseorder = purchase_order_number

        validity = Utilities().check_if_valid_purchase_order_format(
            self.purchaseorder)
        if validity is False:
            log.debug('Purchase order %s is invalid', self.purchaseorder)
            return

        html = self._get_html()
        self.vendor_id_city = self._get_city_vendor_id(html)
        self._download_vendor_profile(self.vendor_id_city)

        soup = BeautifulSoup(html)
        self.description = self._get_description(soup)

        try:
            self.vendor_name = self._get_vendor_name()
        except IOError as e:
            log.error(e, exc_info=True)

            self.vendor_name = "unknown"

            log.info('No vendor info for purchase order %s',
                     self.purchaseorder)

        self.department = self._get_department(soup)
        self.k_number = self._get_knumber(soup)
        self.attachments = self._get_attachments(soup)
        self.data = self._get_data()
        self.title = "%s : %s" % (self.vendor_name, self.description)
Ejemplo n.º 9
0
    def check_if_need_to_scrape(self, page):
        """
        If page is <= 10 and last scrape was before today, scrape it.

        If page is > 10 and last scrape was more than seven days ago, scrape.

        :params page: The purchasing site page number to check.
        :type page: int.
        :returns: boolean. True if need to scrape, False if not.
        """
        today_date = date.today()
        week_ago_date = date.today() - timedelta(days=7)

        date_last_scraped = self._check_when_last_scraped(page)

        if date_last_scraped is None:
            return True  # Scrape this page
        elif page <= 10:
            if date_last_scraped < today_date:
                return True  # Scrape this page
            else:
                log.info('Skipping page %d. It was scraped recently', page)

                return False
        elif page > 10:
            if date_last_scraped < week_ago_date:
                return True  # Scrape this page
            else:
                log.info('Skipping page %d. It was scraped recently', page)

                return False
Ejemplo n.º 10
0
 def try_to_link(self, vendor_name):
     """TODO."""
     search_results = self.search_sos(vendor_name)
     total_hits = self.get_total_hits(search_results)
     if total_hits == 1:
         log.info("Perfect match for %s", vendor_name)
         self.process_direct_hit(search_results, vendor_name)
Ejemplo n.º 11
0
    def _backup(self, document_cloud_id):
        '''Backup a contract.'''
        needs_backup = self._needs_to_be_backed_up(document_cloud_id)

        if needs_backup or self.force:
            log.info("Creating backup for %s", document_cloud_id)

            document = self.client.documents.get(document_cloud_id)
            metadata = self._get_meta_data(document)

            pdf_path = self._get_path(document_cloud_id, ".pdf")
            pdf_exists = os.path.exists(pdf_path)
            if not pdf_exists or self.force:
                pdf = document.pdf
                with open(pdf_path, "wb") as outfile:
                    outfile.write(pdf)

            txt_path = self._get_path(document_cloud_id, ".txt")
            txt_exists = os.path.exists(txt_path)
            if not txt_exists or self.force:
                with open(txt_path, "wb") as outfile:
                    outfile.write(json.dumps(metadata))

            text_txt_path = self._get_path(document_cloud_id, "_text.txt")
            text_txt_exists = os.path.exists(text_txt_path)
            if not text_txt_exists or self.force:
                with open(text_txt_path, "wb") as outfile:
                    outfile.write(json.dumps(document.full_text))
        else:
            log.info("%s is already is backed up", document_cloud_id)
Ejemplo n.º 12
0
    def __init__(self, purchase_order_number):
        self.purchaseorder = purchase_order_number

        validity = Utilities().check_if_valid_purchase_order_format(
            self.purchaseorder)
        if validity is False:
            log.debug('Purchase order %s is invalid', self.purchaseorder)
            return

        html = self._get_html()
        self.vendor_id_city = self._get_city_vendor_id(html)
        self._download_vendor_profile(self.vendor_id_city)

        soup = BeautifulSoup(html)
        self.description = self._get_description(soup)

        try:
            self.vendor_name = self._get_vendor_name()
        except IOError as e:
            log.error(e, exc_info=True)

            self.vendor_name = "unknown"

            log.info('No vendor info for purchase order %s',
                     self.purchaseorder)

        self.department = self._get_department(soup)
        self.k_number = self._get_knumber(soup)
        self.attachments = self._get_attachments(soup)
        self.data = self._get_data()
        self.title = "%s : %s" % (self.vendor_name, self.description)
Ejemplo n.º 13
0
    def _download_attachment(self, attachment):
        '''
        Download an attachment associated with a purchase order.

        :param attachment: The name of the attachment file to download.
        :type attachment: string
        '''

        # The city's purchasing site has an internal ID for each attachment.
        # Here we use it to download the attachment files, and also to store
        # locally so we can have a list of the attachments we have on hand.
        city_attachment_id = re.search(
            '[0-9]+', attachment.get('href')).group()
        log.debug('Gathering data for attachment %s', city_attachment_id)

        document_path = '%s/%s.pdf' % (DOCUMENTS_DIR, city_attachment_id)

        display_name = self._get_attachment_display_name(city_attachment_id)

        if os.path.isfile(document_path):  # Have already downloaded
            log.info('Already have PDF for attachment %s', city_attachment_id)
        else:
            self._download_attachment_file(city_attachment_id,
                                           display_name,
                                           document_path)
Ejemplo n.º 14
0
    def _get_attachments(soup):
        '''
        Find the attachments to download from the contract page HTML.

        :param soup: A BeautifulSoup object for the contract page's HTML.
        :type soup: BeautifulSoup object
        :returns: ???
        '''

        try:
            main_table = soup.select('.table-01').pop()

            metadatarow = (main_table
                           .findChildren(['tr'])[2]
                           .findChildren(['td'])[0]
                           .findChildren(['table'])[0]
                           .findChildren(['tr']))

            attachment_filenames = (metadatarow[16]
                                    .findChildren(['td'])[1]
                                    .findChildren(['a']))

            return attachment_filenames
        except IndexError:
            log.info('No attachments found')

            return []  # The city does not always include attachment files.
Ejemplo n.º 15
0
    def _get_vendor_name(self):  # , soup):
        '''
        Find the vendor name in the contract HTML. If that fails, then ___

        :param soup: A BeautifulSoup object for the contract page HTML.
        :type soup: BeautifulSoup object.
        :returns: string. The contract vendor's name.
        '''
        vendor_file_location = '%s/%s.html' % (VENDORS_DIR, self.vendor_id_city)

        # Downloaded this file in _download_vendor_profile()
        with open(vendor_file_location, 'r') as myfile:
            log.info('Reading HTML for vendor %s', self.vendor_id_city)
            html = myfile.read()

        soup = BeautifulSoup(html)

        vendor_row = soup(text='Company Name:')[0].parent.parent

        vendor_name = (vendor_row
                       .findChildren(['td'])[5]
                       .contents.pop().strip())

        # Convert to uppercase for DocumentCloud project metadata.
        # Search queries are also converted to uppercase.
        vendor_name = vendor_name.upper()

        return vendor_name
Ejemplo n.º 16
0
    def _get_attachment_display_name(self, city_attachment_id):
        '''docstring'''

        response = urllib2.urlopen(
            'http://www.purchasing.cityofno.com/bso/external/document/' +
            'attachments/attachmentFileDetail.sdo?' +
            'fileNbr=%s' % city_attachment_id +
            '&docId=%s' % self.purchaseorder +
            '&docType=P&releaseNbr=0&parentUrl=/external/purchaseorder/' +
            'poSummary.sdo&external=true')
        html = response.read()

        file_location = '%s/%s.html' % (ATTACHMENTS_DIR, city_attachment_id)

        if not os.path.exists(os.path.dirname(file_location)):
            os.makedirs(os.path.dirname(file_location))

        with open(file_location, 'w') as filename:
            log.info('Saving HTML for attachment %s', city_attachment_id)
            filename.write(html)

        soup = BeautifulSoup(html)
        header = soup.select(".sectionheader-01")[0].contents.pop()
        header = ' '.join(header.split())

        attachment_file_name = str(header).replace("Attachment File Detail:",
                                                   "").strip()

        return attachment_file_name
Ejemplo n.º 17
0
    def _check_if_need_to_download_contract(purchase_order_number):
        '''
        Determines whether this contract should be downloaded, and also whether
        it needs to be added to our DocumentCloud and local database.

        :param purchase_order_number: The contract's purchase order number.
        :type purchase_order_number: string
        '''

        log.info('Checking purchase order %s', purchase_order_number)

        # Check local file repository
        try:
            log.debug('LensRepository')

            need_to_download = LensRepository(
                purchase_order_number).check_if_need_to_download()
            if need_to_download:
                LensRepository(purchase_order_number).download_purchase_order()
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        try:
            log.debug('PurchaseOrder')

            purchase_order_object = PurchaseOrder(purchase_order_number)
            purchase_order_object.download_attachments()
        except IndexError:
            log.exception(purchase_order_number)
            return

        # Check DocumentCloud project
        try:
            log.debug('DocumentCloudProject')

            need_to_upload = DocumentCloudProject().check_if_need_to_upload(
                purchase_order_number)
            if need_to_upload:
                DocumentCloudProject().prepare_then_add_contract(
                    purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        # Check local database
        try:
            log.debug('LensDatabase')

            contract_exist = LensDatabase().check_if_database_has_contract(
                purchase_order_number)
            if contract_exist is False:
                LensDatabase().add_to_database(purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s is not posted publically.',
                          purchase_order_number)
Ejemplo n.º 18
0
    def _check_if_need_to_download_contract(purchase_order_number):
        '''
        Determines whether this contract should be downloaded, and also whether
        it needs to be added to our DocumentCloud and local database.

        :param purchase_order_number: The contract's purchase order number.
        :type purchase_order_number: string
        '''

        log.info('Checking purchase order %s', purchase_order_number)

        # Check local file repository
        try:
            log.debug('LensRepository')

            need_to_download = LensRepository(
                purchase_order_number).check_if_need_to_download()
            if need_to_download:
                LensRepository(purchase_order_number).download_purchase_order()
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        try:
            log.debug('PurchaseOrder')

            purchase_order_object = PurchaseOrder(purchase_order_number)
            purchase_order_object.download_attachments()
        except IndexError:
            log.exception(purchase_order_number)
            return

        # Check DocumentCloud project
        try:
            log.debug('DocumentCloudProject')

            need_to_upload = DocumentCloudProject().check_if_need_to_upload(
                purchase_order_number)
            if need_to_upload:
                DocumentCloudProject().prepare_then_add_contract(
                    purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s not posted publically',
                          purchase_order_number)

        # Check local database
        try:
            log.debug('LensDatabase')

            contract_exist = LensDatabase().check_if_database_has_contract(
                purchase_order_number)
            if contract_exist is False:
                LensDatabase().add_to_database(purchase_order_object)
        except urllib2.HTTPError:
            log.exception('Purchase order %s is not posted publically.',
                          purchase_order_number)
Ejemplo n.º 19
0
    def _get_html(self):
        '''Read the HTML contents of this purchase order file.

        :returns: string. The HTML contains for this purchase order file.
        '''
        file_location = '%s/%s.html' % (PURCHASE_ORDER_DIR, self.purchaseorder)

        # Purchase order HTML saved in PurchaseOrder class
        with open(file_location, 'r') as html_file:
            log.info('Reading HTML for purchase order %s', self.purchaseorder)

            return html_file.read()
Ejemplo n.º 20
0
    def _get_html(self):
        '''Read the HTML contents of this purchase order file.

        :returns: string. The HTML contains for this purchase order file.
        '''
        file_location = '%s/%s.html' % (PURCHASE_ORDER_DIR, self.purchaseorder)

        # Purchase order HTML saved in PurchaseOrder class
        with open(file_location, 'r') as html_file:
            log.info('Reading HTML for purchase order %s',
                     self.purchaseorder)

            return html_file.read()
Ejemplo n.º 21
0
    def link(self, name, vendor):
        """Link the vendor to the company."""
        name = name.strip("\n").replace(".", "").strip()

        # get the vendor:
        vendorindb = (SESSION.query(Vendor)
                      .filter(Vendor.name == vendor)
                      .first())

        # get the person:
        personindb = (SESSION.query(Person)
                      .filter(Person.name == name)
                      .first())

        co = (SESSION.query(Company)
              .filter(Company.name == name))

        companyindb = co.first()  # get the company
        if personindb is not None and companyindb is None:
            link = (SESSION.query(VendorOfficer)
                    .filter(VendorOfficer.vendorid == vendorindb.id)
                    .filter(VendorOfficer.personid == personindb.id)
                    .count())

            if vendorindb is not None and personindb is not None and link < 1:
                log.info("Linking {0} to {1}",
                         str(vendorindb.id), str(personindb.id))
                link = VendorOfficer(vendorindb.id, personindb.id)
                SESSION.add(link)
                SESSION.commit()
                return

        if companyindb is not None and personindb is None:
            link = (SESSION.query(VendorOfficerCompany)
                    .filter(VendorOfficerCompany.vendorid == vendorindb.id)
                    .filter(VendorOfficerCompany.companiesid == companyindb.id)
                    .count())

            if vendorindb is not None and companyindb is not None and link < 1:
                print("Linking {0} to {1}".format(
                    str(vendorindb.id), str(companyindb.id)
                ))
                link = VendorOfficerCompany(vendorindb.id, companyindb.id)
                SESSION.add(link)
                SESSION.commit()
                return

        SESSION.close()
Ejemplo n.º 22
0
    def _check_if_contract_number_is_null(purchase_order_object):
        '''
        Checks if this contract number is null.

        :params purchase_order_object: A PurchaseOrder object.
        :type purchase_order_object: A PurchaseOrder object.
        :returns: boolean. True if the contract number is null, False if not.
        '''
        if len(purchase_order_object.data['contract number']) < 1:
            log.info('Not uploading purchase order %s to DocumentCloud',
                     purchase_order_object.data['purchase order'])
            log.info('Contract number %s is null',
                     purchase_order_object.data['contract number'])

            return True
        else:
            return False
    def match_local_database_to_document_cloud(self):
        '''
        Match our local database to our DocumentCloud project.

        TODO: Why fetching half-filled contracts?
        '''

        half_filled_contracts = LensDatabase().get_half_filled_contracts()
        log.info('%d half-filled contracts need to be synced',
                 len(half_filled_contracts))

        for half_filled_contract in half_filled_contracts:
            try:
                contract = self.client.documents.get(
                    half_filled_contract.doc_cloud_id)
                self._match_contract(contract)
            except Exception as e:
                log.error(e, exc_info=True)
Ejemplo n.º 24
0
    def link(self, name, vendor):
        """Link the vendor to the company."""
        name = name.strip("\n").replace(".", "").strip()

        # get the vendor:
        vendorindb = (SESSION.query(Vendor).filter(
            Vendor.name == vendor).first())

        # get the person:
        personindb = (SESSION.query(Person).filter(
            Person.name == name).first())

        co = (SESSION.query(Company).filter(Company.name == name))

        companyindb = co.first()  # get the company
        if personindb is not None and companyindb is None:
            link = (SESSION.query(VendorOfficer).filter(
                VendorOfficer.vendorid == vendorindb.id).filter(
                    VendorOfficer.personid == personindb.id).count())

            if vendorindb is not None and personindb is not None and link < 1:
                log.info("Linking {0} to {1}", str(vendorindb.id),
                         str(personindb.id))
                link = VendorOfficer(vendorindb.id, personindb.id)
                SESSION.add(link)
                SESSION.commit()
                return

        if companyindb is not None and personindb is None:
            link = (SESSION.query(VendorOfficerCompany).filter(
                VendorOfficerCompany.vendorid == vendorindb.id).filter(
                    VendorOfficerCompany.companiesid ==
                    companyindb.id).count())

            if vendorindb is not None and companyindb is not None and link < 1:
                print("Linking {0} to {1}".format(str(vendorindb.id),
                                                  str(companyindb.id)))
                link = VendorOfficerCompany(vendorindb.id, companyindb.id)
                SESSION.add(link)
                SESSION.commit()
                return

        SESSION.close()
Ejemplo n.º 25
0
    def _write_purchase_order(self, html, file_location):
        '''
        This takes an individual contract page's HTML and writes it out to an
        HTML file in the proper location.

        :param html: The individual contract page's HTML.
        :type html: string.
        :param file_location: The path to where the file should be created.
        :type file_location: string.
        '''

        if not os.path.exists(os.path.dirname(file_location)):
            os.makedirs(os.path.dirname(file_location))

        with open(file_location, 'w') as filename:
            log.info('Saving HTML for purchase order %s',
                     self.purchase_order_number)

            filename.write(html)
Ejemplo n.º 26
0
    def _write_purchase_order(self, html, file_location):
        '''
        This takes an individual contract page's HTML and writes it out to an
        HTML file in the proper location.

        :param html: The individual contract page's HTML.
        :type html: string.
        :param file_location: The path to where the file should be created.
        :type file_location: string.
        '''

        if not os.path.exists(os.path.dirname(file_location)):
            os.makedirs(os.path.dirname(file_location))

        with open(file_location, 'w') as filename:
            log.info('Saving HTML for purchase order %s',
                     self.purchase_order_number)

            filename.write(html)
    def match_local_database_to_document_cloud(self):
        '''
        Match our local database to our DocumentCloud project.

        TODO: Why fetching half-filled contracts?
        '''

        half_filled_contracts = LensDatabase().get_half_filled_contracts()
        log.info('%d half-filled contracts need to be synced',
                 len(half_filled_contracts))

        for half_filled_contract in half_filled_contracts:
            try:
                contract = self.client.documents.get(
                    half_filled_contract.doc_cloud_id
                )
                self._match_contract(contract)
            except Exception as e:
                log.error(e, exc_info=True)
Ejemplo n.º 28
0
    def get_people_associated_with_vendor(self, name):
        """
        Get a list of people associated with the vendor.

        Not called on by this class, but is called on by emailer.py.

        :param name: The vendor name.
        :type name: string
        :returns: list. The people who are associated with this vendor (how?).
        """
        query = (SESSION.query(
            Person.name).filter(Vendor.id == VendorOfficer.vendorid).filter(
                Person.id == VendorOfficer.personid).filter(
                    Vendor.name == name).all())

        SESSION.close()

        log.info('%d people associated with %s', len(query), name)

        return [str(row[0]) for row in query]
Ejemplo n.º 29
0
    def check_if_database_has_contract(self, purchase_order_number):
        """
        Check if local database already has this contract.

        :param purchase_order_number: The unique ID in the city's website.
        :type purchase_order_number: string
        :returns: boolean. True if the contract is present, False if not.
        """
        count = (SESSION.query(Contract).filter(
            Contract.purchaseordernumber == purchase_order_number).count())

        SESSION.close()

        if count == 1:  # Database has the contract
            log.info('DB contracts table already has purchase order %s',
                     purchase_order_number)
            return True
        else:
            log.info('DB contracts table does not have purchase order %s',
                     purchase_order_number)
            return False
Ejemplo n.º 30
0
    def _download_vendor_profile(city_vendor_id):
        '''
        Download the vendor page associated with a purchase order, if we don't
        have the vendor page already.

        :param city_vendor_id: The vendor ID on the city's purchasing site.
        :type city_vendor_id: string.
        '''
        vendor_file_location = '%s/%s.html' % (VENDORS_DIR, city_vendor_id)

        if os.path.isfile(vendor_file_location):
            log.info('Already have HTML for vendor %s', city_vendor_id)
        else:
            try:
                response = urllib2.urlopen(
                    'http://www.purchasing.cityofno.com/' +
                    'bso/external/vendor/vendorProfileOrgInfo.sdo?' +
                    'external=true&vendorId={}'.format(city_vendor_id))

                html = response.read()

                if not os.path.exists(os.path.dirname(vendor_file_location)):
                    os.makedirs(os.path.dirname(vendor_file_location))

                with open(vendor_file_location, 'w') as filename:
                    log.info('Saving HTML for vendor %s', city_vendor_id)
                    filename.write(html)
            except urllib2.HTTPError:
                log.info('Could not save HTML for vendor %s', city_vendor_id)
Ejemplo n.º 31
0
    def _download_vendor_profile(city_vendor_id):
        '''
        Download the vendor page associated with a purchase order, if we don't
        have the vendor page already.

        :param city_vendor_id: The vendor ID on the city's purchasing site.
        :type city_vendor_id: string.
        '''
        vendor_file_location = '%s/%s.html' % (VENDORS_DIR, city_vendor_id)

        if os.path.isfile(vendor_file_location):
            log.info('Already have HTML for vendor %s', city_vendor_id)
        else:
            try:
                response = urllib2.urlopen(
                    'http://www.purchasing.cityofno.com/' +
                    'bso/external/vendor/vendorProfileOrgInfo.sdo?' +
                    'external=true&vendorId={}'.format(city_vendor_id))

                html = response.read()

                if not os.path.exists(os.path.dirname(vendor_file_location)):
                    os.makedirs(os.path.dirname(vendor_file_location))

                with open(vendor_file_location, 'w') as filename:
                    log.info('Saving HTML for vendor %s', city_vendor_id)
                    filename.write(html)
            except urllib2.HTTPError:
                log.info('Could not save HTML for vendor %s', city_vendor_id)
Ejemplo n.º 32
0
    def _get_attachments(soup):
        '''
        Find the attachments to download from the contract page HTML.

        :param soup: A BeautifulSoup object for the contract page's HTML.
        :type soup: BeautifulSoup object
        :returns: ???
        '''

        try:
            main_table = soup.select('.table-01').pop()

            metadatarow = (main_table.findChildren(['tr'])[2].findChildren(
                ['td'])[0].findChildren(['table'])[0].findChildren(['tr']))

            attachment_filenames = (metadatarow[16].findChildren(
                ['td'])[1].findChildren(['a']))

            return attachment_filenames
        except IndexError:
            log.info('No attachments found')

            return []  # The city does not always include attachment files.
Ejemplo n.º 33
0
    def _download_attachment(self, attachment):
        '''
        Download an attachment associated with a purchase order.

        :param attachment: The name of the attachment file to download.
        :type attachment: string
        '''

        # The city's purchasing site has an internal ID for each attachment.
        # Here we use it to download the attachment files, and also to store
        # locally so we can have a list of the attachments we have on hand.
        city_attachment_id = re.search('[0-9]+',
                                       attachment.get('href')).group()
        log.debug('Gathering data for attachment %s', city_attachment_id)

        document_path = '%s/%s.pdf' % (DOCUMENTS_DIR, city_attachment_id)

        display_name = self._get_attachment_display_name(city_attachment_id)

        if os.path.isfile(document_path):  # Have already downloaded
            log.info('Already have PDF for attachment %s', city_attachment_id)
        else:
            self._download_attachment_file(city_attachment_id, display_name,
                                           document_path)
Ejemplo n.º 34
0
    def add_name(self, name):
        """TODO."""
        name = name.replace(".", "").strip()
        if self.is_this_a_person(name):
            # people with Jr ect at the end of the name are people

            indb = (SESSION.query(Person)
                    .filter(Person.name == name)
                    .count())

            if indb == 0:
                SESSION.add(Person(name))
                SESSION.commit()
                return

            if indb == 1:
                SESSION.close()
                return

        if self._is_this_a_company(name):
            indb = (SESSION.query(Company)
                    .filter(Company.name == name)
                    .count())

            if indb == 0:
                SESSION.add(Company(name))
                SESSION.commit()
                return

            if indb == 1:
                SESSION.close()
                return

        log.info("Could not link %s", name)

        SESSION.close()
Ejemplo n.º 35
0
        url = 'http://www.purchasing.cityofno.com/bso/' + \
            'external/advsearch/searchContract.sdo'

        req = urllib2.Request(url=url, data=data)
        req.add_header('Pragma', ' no-cache')
        req.add_header('Origin', 'http://www.purchasing.cityofno.com')
        req.add_header('Accept-Encoding', 'gzip, deflate')
        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
        req.add_header(
            'Accept',
            'text/add_contracthtml,application/xhtml+xml,application/xml;' +
            'q=0.9,image/webp,*/*;q=0.8')
        req.add_header('Cache-Control', 'no-cache')
        req.add_header(
            'Referer',
            'http://www.purchasing.cityofno.com/bso/external/advsearch/' +
            'searchContract.sdo')
        req.add_header('Connection', 'keep-alive')
        req.add_header('DNT', '1')

        response = urllib2.urlopen(req)
        output = response.read()
        response.close()

        return output


if __name__ == '__main__':
    log.info("Checking the city's purchasing site for new contracts")
    CheckCity().check_pages()
Ejemplo n.º 36
0
            'external/advsearch/searchContract.sdo'

        req = urllib2.Request(url=url, data=data)
        req.add_header('Pragma', ' no-cache')
        req.add_header('Origin', 'http://www.purchasing.cityofno.com')
        req.add_header('Accept-Encoding', 'gzip, deflate')
        req.add_header('Content-Type', 'application/x-www-form-urlencoded')
        req.add_header(
            'Accept',
            'text/add_contracthtml,application/xhtml+xml,application/xml;' +
            'q=0.9,image/webp,*/*;q=0.8'
        )
        req.add_header('Cache-Control', 'no-cache')
        req.add_header(
            'Referer',
            'http://www.purchasing.cityofno.com/bso/external/advsearch/' +
            'searchContract.sdo'
        )
        req.add_header('Connection', 'keep-alive')
        req.add_header('DNT', '1')

        response = urllib2.urlopen(req)
        output = response.read()
        response.close()

        return output

if __name__ == '__main__':
    log.info("Checking the city's purchasing site for new contracts")
    CheckCity().check_pages()