def get_contracts(self, offset=0, limit=None): ''' Query the database in reverse chronological order. Specify the number of recent contracts with offset and limit values. :param offset: The number of pages to offset database query. :type offset: int :param limit: The number of records to return. :type limit: int :returns: list. (?) The contracts that matched the query. ''' # sn = sessionmaker(bind=self.engine) # session = sn() offset *= self.pagelength contracts = (SESSION.query(Contract).order_by( Contract.dateadded.desc()).offset(offset).limit(limit).all()) SESSION.close() contracts = self.translate_to_doc_cloud_form(contracts) log.debug('Contracts: %s', contracts) return contracts
def _update_contract_from_document_cloud(self, document_cloud_id, fields): """ Update an existing contract in the local database. TODO: compare to add_contract(), because this doesn't update. It adds. :param document_cloud_id: The unique ID in DocumentCloud. :type document_cloud_id: string :param fields: The metadata fields to add along with the contract? :type fields: dict """ log.debug('Updating contract in database that has DocumentCloud ID %s', document_cloud_id) contract = (SESSION.query(Contract).filter( Contract.doc_cloud_id == document_cloud_id).first()) contract.contractnumber = fields['contractno'] contract.vendorid = fields['vendor'] contract.departmentid = fields['department'] contract.dateadded = fields['dateadded'] contract.title = fields['title'] contract.purchaseordernumber = fields['purchaseno'] contract.description = fields['description'] SESSION.add(contract) SESSION.commit()
def get_officers(self, vendor=None): ''' Get officers for a given vendor. :param vendor: The vendor to check on. :type vendor: string :returns: list. A list of officers listed under the vendor company in \ the Secretary of State's database. ''' # sn = sessionmaker(bind=self.engine) # session = sn() officers = [] if vendor is None: officers = (SESSION.query(VendorOfficer, Person).filter( VendorOfficer.personid == Person.id).order_by(Person.name)) else: vendor = vendor.replace("vendor:", "") officers = (SESSION.query( VendorOfficer, Person, Vendor).filter(VendorOfficer.personid == Person.id).filter( VendorOfficer.vendorid == Vendor.id).filter( Vendor.name == vendor).all()) SESSION.close() return sorted(list(set([o[1].name for o in officers])))
def translate_officer_to_vendor(self, officer_term): ''' Translates a request for an officer to a request for a vendor associated with a given officer. :param officer_term: The name of the officer. :type officer_term: string :returns: ??? ''' # sn = sessionmaker(bind=self.engine) # session = sn() officer_term = officer_term.replace('"', "").replace("officers:", "").strip() results = ( SESSION.query(Person, VendorOfficer, Vendor).filter(Person.name == officer_term).filter( Person.id == VendorOfficer.personid).filter( VendorOfficer.vendorid == Vendor.id).all() ) # todo fix to get .first() working output = results.pop()[2].name log.info("Translating %s to %s", officer_term, output) SESSION.close() return output
def _add_contract_to_local_database(self, contract): """ Add a contract to the local database. :param contract: The contract to add to our database. :type contract: A ___ class instance. """ SESSION.add(contract) SESSION.commit()
def get_names_from_vendor(self, name): """TODO.""" query = (SESSION.query( Person.name).filter(Vendor.id == VendorOfficer.vendorid).filter( Person.id == VendorOfficer.personid).filter( Vendor.name == name).all()) SESSION.close() return [str(row[0]) for row in query]
def get_daily_contracts(self, today_string=TODAY_DATE): """TODO.""" # defaults to today query = (SESSION.query( Contract.doc_cloud_id, Vendor.name).filter(Contract.dateadded == today_string).filter( Contract.vendorid == Vendor.id).all()) SESSION.close() return query
def get_names_from_vendor(self, name): """TODO.""" query = (SESSION.query(Person.name) .filter(Vendor.id == VendorOfficer.vendorid) .filter(Person.id == VendorOfficer.personid) .filter(Vendor.name == name) .all()) SESSION.close() return [str(row[0]) for row in query]
def _add_department(self, department): """ Add department to the local database. :param meta_field: The department to add to local database. :type meta_field: string """ log.debug('Adding department "%s" to database', department) SESSION.add(Department(department)) SESSION.commit()
def get_daily_contracts(self, today_string=TODAY_DATE): """TODO.""" # defaults to today query = (SESSION.query(Contract.doc_cloud_id, Vendor.name) .filter(Contract.dateadded == today_string) .filter(Contract.vendorid == Vendor.id) .all()) SESSION.close() return query
def _add_vendor(self, vendor, vendor_id_city=None): """ Add vendor to the local database. :param vendor: The vendor to add to our database. :type vendor: string """ log.debug('Adding vendor "%s" to database', vendor) vendor = Vendor(vendor, vendor_id_city) SESSION.add(vendor) SESSION.commit()
def _get_officers(self): """ Return a list of all company officers in the database. :returns: list. All of the company officers in our database. """ # TODO: Test that this works correctly before using. query = (SESSION.query(Person.name).all()) SESSION.close() # TODO officers = [row.name for row in query]
def _get_contract(self, purchase_order_number): """ Get a contract from the database. :param purchase_order_no: The unique ID in the city's website. :type purchase_order_no: string :returns: dict. A dict (?) for the matching contract. """ query = (SESSION.query(Contract).filter( Contract.purchaseordernumber == purchase_order_number).first()) SESSION.close() return query
def get_all_contract_ids(self): """ Fetch a list of all of the contract IDs in our DocumentCloud project. Not called on by this class, but is called on by backup.py. :returns: list. A list of all IDs in our DocumentCloud project. """ query = (SESSION.query(Contract.doc_cloud_id).order_by( desc(Contract.dateadded)).all()) SESSION.close() return [row[0] for row in query]
def get_contracts_count(self): ''' Query the count of all contracts in database. :returns: int. The total number of contracts in the database. ''' # sn = sessionmaker(bind=self.engine) # session = sn() count = SESSION.query(Contract).count() SESSION.close() return count
def _get_database_vendor_id(self, vendor): """ Get a vendor's ID from our database. :param vendor: The vendor name. :type vendor: string :returns: string. The database's vendor ID for this vendor. """ log.debug('Fetching database ID for vendor "%s"', vendor) vendor = (SESSION.query(Vendor).filter(Vendor.name == vendor).first()) SESSION.close() return vendor.id
def _get_department_id(self, department): """ Get the department's ID from our database. :param department: The department name. :type department: string :returns: string. The database ID for the department name. """ log.debug('Finding ID for department "%s" in database', department) department = (SESSION.query(Department).filter( Department.name == department).first()) SESSION.close() return department.id
def get_vendors(self): ''' Query all vendors in the database linked to a contract. :returns: list. (?) The vendors that are linked to a contract. ''' # sn = sessionmaker(bind=self.engine) # session = sn() vendors = (SESSION.query(Vendor.name).filter( Vendor.id == Contract.vendorid).distinct().order_by(Vendor.name)) SESSION.close() return [vendor[0].strip() for vendor in vendors]
def get_departments(self): ''' Query all departments in the database. :returns: list. All departments in our database. ''' # sn = sessionmaker(bind=self.engine) # session = sn() departments = (SESSION.query(Department.name).distinct().order_by( Department.name).all()) SESSION.close() return [department[0].strip() for department in departments]
def get_daily_contracts(self): """ Get today's contracts (and the vendors). Not called on by this class, but is called on by emailer.py. :returns: A list of dicts (?) for the daily contracts. """ query = (SESSION.query( Contract.doc_cloud_id, Vendor.name).filter(Contract.dateadded == TODAY_DATE).filter( Contract.vendorid == Vendor.id).all()) SESSION.close() return query
def _check_if_vendor_exists(self, vendor): """ Check if database has this vendor. :param vendor: The vendor to check for. :type vendor: string? :returns: boolean. True if vendor exists in database, False if not. """ count = (SESSION.query(Vendor).filter(Vendor.name == vendor).count()) SESSION.close() if count == 0: log.debug('Vendor "%s" is missing from database', vendor) return False else: return True
def _get_contract_doc_cloud_id(self, document_cloud_id): """ Get a contract from the DocumentCloud project. :param document_cloud_id: The unique ID in the DocumentCloud project. :type document_cloud_id: string :returns: dict. A dict (?) for the matching contract. """ log.debug('Find contract in database that has DocumentCloud ID %s', document_cloud_id) query = (SESSION.query(Contract).filter( Contract.doc_cloud_id == document_cloud_id).first()) SESSION.close() return query
def _check_if_department_exists(self, department): """ Check if database has this department. :param department: ??? :type department: ??? :returns: boolean. True if it exists in the database, False if not. """ count = (SESSION.query(Department).filter( Department.name == department).count()) SESSION.close() if count == 0: log.debug('Department "%s" is missing from database', department) return False else: return True
def get_people_associated_with_vendor(self, name): """ Get a list of people associated with the vendor. Not called on by this class, but is called on by emailer.py. :param name: The vendor name. :type name: string :returns: list. The people who are associated with this vendor (how?). """ query = (SESSION.query( Person.name).filter(Vendor.id == VendorOfficer.vendorid).filter( Person.id == VendorOfficer.personid).filter( Vendor.name == name).all()) SESSION.close() log.info('%d people associated with %s', len(query), name) return [str(row[0]) for row in query]
def get_half_filled_contracts(self): """ A half-filled contract is when we know the DocumentCloud ID but don't know purchase order number or any of the other metadata in the city's purchase order system because when we upload the contract to DocumentCloud... Called on by sync_local_database_document_cloud.py. DocumentCloud doesn't give immediate access to all document properties. This pulls out the contracts in the database added during upload but that still need to have their details filled in. :returns: SQLAlchemy query result. """ query = (SESSION.query(Contract).filter( Contract.departmentid is None).all()) SESSION.close() return query
def check_if_database_has_contract(self, purchase_order_number): """ Check if local database already has this contract. :param purchase_order_number: The unique ID in the city's website. :type purchase_order_number: string :returns: boolean. True if the contract is present, False if not. """ count = (SESSION.query(Contract).filter( Contract.purchaseordernumber == purchase_order_number).count()) SESSION.close() if count == 1: # Database has the contract log.info('DB contracts table already has purchase order %s', purchase_order_number) return True else: log.info('DB contracts table does not have purchase order %s', purchase_order_number) return False
def _check_when_last_scraped(self, page): """ Look up this page in scrape_log table to see when it was last scraped. :params page: The purchasing site's page to check. :type page: int. :returns: date. When this page was last scraped. None if never. """ query = (SESSION.query(ScrapeLog).filter(ScrapeLog.page == page).all()) if len(query) == 0: # No row yet for this page (total number varies) return None SESSION.close() # for row in query: date_last_scraped = query.pop().last_scraped log.debug('This page was last scraped %s', date_last_scraped.strftime('%Y-%m-%d')) return date_last_scraped
def add_vendor(self, vendor_name): """TODO.""" indb = (SESSION.query(Vendor).filter( Vendor.name == vendor_name).count()) if indb == 0: vendor = Vendor(vendor_name.replace(".", "")) SESSION.add(vendor) SESSION.commit() else: SESSION.close()
def add_vendor(self, vendor_name): """TODO.""" indb = (SESSION.query(Vendor) .filter(Vendor.name == vendor_name) .count()) if indb == 0: vendor = Vendor(vendor_name.replace(".", "")) SESSION.add(vendor) SESSION.commit() else: SESSION.close()
def update_scrape_log(self, page): """TODO.""" query = (SESSION.query(ScrapeLog).filter(ScrapeLog.page == page).all()) if len(query) == 0: # No row yet for this page (total number varies) # Add this page to database scrape_info = ScrapeLog(page, TODAY_DATE) SESSION.add(scrape_info) SESSION.commit() else: # Update this page in the database update_query = (SESSION.query(ScrapeLog).filter( ScrapeLog.page == page).one()) update_query.last_scraped = TODAY_DATE SESSION.commit()
def add_name(self, name): """TODO.""" name = name.replace(".", "").strip() if self.is_this_a_person(name): # people with Jr ect at the end of the name are people indb = (SESSION.query(Person).filter(Person.name == name).count()) if indb == 0: SESSION.add(Person(name)) SESSION.commit() return if indb == 1: SESSION.close() return if self._is_this_a_company(name): indb = (SESSION.query(Company).filter( Company.name == name).count()) if indb == 0: SESSION.add(Company(name)) SESSION.commit() return if indb == 1: SESSION.close() return log.info("Could not link %s", name) SESSION.close()
def link(self, name, vendor): """Link the vendor to the company.""" name = name.strip("\n").replace(".", "").strip() # get the vendor: vendorindb = (SESSION.query(Vendor).filter( Vendor.name == vendor).first()) # get the person: personindb = (SESSION.query(Person).filter( Person.name == name).first()) co = (SESSION.query(Company).filter(Company.name == name)) companyindb = co.first() # get the company if personindb is not None and companyindb is None: link = (SESSION.query(VendorOfficer).filter( VendorOfficer.vendorid == vendorindb.id).filter( VendorOfficer.personid == personindb.id).count()) if vendorindb is not None and personindb is not None and link < 1: log.info("Linking {0} to {1}", str(vendorindb.id), str(personindb.id)) link = VendorOfficer(vendorindb.id, personindb.id) SESSION.add(link) SESSION.commit() return if companyindb is not None and personindb is None: link = (SESSION.query(VendorOfficerCompany).filter( VendorOfficerCompany.vendorid == vendorindb.id).filter( VendorOfficerCompany.companiesid == companyindb.id).count()) if vendorindb is not None and companyindb is not None and link < 1: print("Linking {0} to {1}".format(str(vendorindb.id), str(companyindb.id))) link = VendorOfficerCompany(vendorindb.id, companyindb.id) SESSION.add(link) SESSION.commit() return SESSION.close()
def add_name(self, name): """TODO.""" name = name.replace(".", "").strip() if self.is_this_a_person(name): # people with Jr ect at the end of the name are people indb = (SESSION.query(Person) .filter(Person.name == name) .count()) if indb == 0: SESSION.add(Person(name)) SESSION.commit() return if indb == 1: SESSION.close() return if self._is_this_a_company(name): indb = (SESSION.query(Company) .filter(Company.name == name) .count()) if indb == 0: SESSION.add(Company(name)) SESSION.commit() return if indb == 1: SESSION.close() return log.info("Could not link %s", name) SESSION.close()
def link(self, name, vendor): """Link the vendor to the company.""" name = name.strip("\n").replace(".", "").strip() # get the vendor: vendorindb = (SESSION.query(Vendor) .filter(Vendor.name == vendor) .first()) # get the person: personindb = (SESSION.query(Person) .filter(Person.name == name) .first()) co = (SESSION.query(Company) .filter(Company.name == name)) companyindb = co.first() # get the company if personindb is not None and companyindb is None: link = (SESSION.query(VendorOfficer) .filter(VendorOfficer.vendorid == vendorindb.id) .filter(VendorOfficer.personid == personindb.id) .count()) if vendorindb is not None and personindb is not None and link < 1: log.info("Linking {0} to {1}", str(vendorindb.id), str(personindb.id)) link = VendorOfficer(vendorindb.id, personindb.id) SESSION.add(link) SESSION.commit() return if companyindb is not None and personindb is None: link = (SESSION.query(VendorOfficerCompany) .filter(VendorOfficerCompany.vendorid == vendorindb.id) .filter(VendorOfficerCompany.companiesid == companyindb.id) .count()) if vendorindb is not None and companyindb is not None and link < 1: print("Linking {0} to {1}".format( str(vendorindb.id), str(companyindb.id) )) link = VendorOfficerCompany(vendorindb.id, companyindb.id) SESSION.add(link) SESSION.commit() return SESSION.close()