def main(filetarget, filename, access_key, access_secret, bucket): if not re.match(VALID_FILENAMES, filename): raise IOError('Not a valid filename. Filenames must have COSTARS with number separated by a dash (Ex. "COSTARS-3.csv").') data = extract(filetarget) s3_files = None # connect to s3 and get contents of bucket bucket = connect_to_s3_bucket(access_key, access_secret, bucket) if bucket: s3_files = bucket.list() try: for row in data: try: turn_off_sqlalchemy_events() except InvalidRequestError: pass company, new_company = get_or_create( db.session, Company, company_name=convert_empty_to_none(row.get('Company')) ) company_contact = determine_company_contact(row) if company_contact: # create the new company contact company_contact, new_contact = get_or_create( db.session, CompanyContact, company_id=company.id, **company_contact ) if new_contact: db.session.add(company_contact) db.session.commit() costars_awardee = convert_empty_to_none(row.get('Company')) try: expiration = datetime.datetime.strptime(row.get('Expiration'), '%m/%d/%y') except ValueError: expiration = None costars_type, _ = get_or_create( db.session, ContractType, name='COSTARS' ) # create or select the contract object contract, new_contract = get_or_create( db.session, ContractBase, contract_type=costars_type, expiration_date=expiration, financial_id=convert_empty_to_none(row.get('CONTROLLER')), description='{costars} - {company}'.format( costars=filename.replace('-', ' ').rstrip('.csv').upper(), company=costars_awardee ) ) # connect to s3 if s3_files: # all files start with 'costars-{number}-', which we should be # able to get from our filename max_ratio = (None, 0) startswith = filename.strip('.csv').lower() for _file in s3_files: _filename = _file.name.encode('utf-8').strip('.pdf').rstrip('.') costars_awardee = costars_awardee.rstrip('.') # because the file start patterns are consistent, strip # out the costars-{number}- _file_awardee = _filename.split('-')[2] # check for absolute matches match_ratio = SM(lambda x: bool(re.match(JUNK_STRING, x)), costars_awardee, _file_awardee).ratio() if match_ratio == 1: # this is an absolute match, insert it into the db and break max_ratio = (_file.generate_url(expires_in=0, query_auth=False), match_ratio) if _filename.startswith(startswith): break else: continue elif match_ratio > max_ratio[1]: # this is the best match we have so far max_ratio = (_file.generate_url(expires_in=0, query_auth=False), match_ratio) continue # use the best match that we have print contract.description, max_ratio if max_ratio[1] > 0.7: contract.contract_href = max_ratio[0] for k, v in row.iteritems(): if k in CONSTANT_FIELDS: continue # insert a new contract property with where the company is located elif k == 'County Located': if row.get('County Located') != '': county_located, new_county_located = get_or_create( db.session, ContractProperty, contract_id=contract.id, key='Located in', value=convert_empty_to_none( '{county} County'.format(county=row.get('County Located')) ) ) else: continue if new_county_located: db.session.add(county_located) # insert a new property with the listed manufacturers elif k == 'Manufacturers': if convert_empty_to_none(row.get('Manufacturers')): manufacturer, new_manufacturer = get_or_create( db.session, ContractProperty, contract_id=contract.id, key='List of manufacturers', value=convert_empty_to_none(row.get('Manufacturers')) ) if new_manufacturer: db.session.add(manufacturer) # we are treating everything else like a line item, # so upload all of those pieces else: if convert_to_bool(convert_empty_to_none(v)): line_item, new_line_item = get_or_create( db.session, LineItem, contract_id=contract.id, description=convert_empty_to_none(k) ) else: continue if new_line_item: db.session.add(line_item) contract.companies.append(company) db.session.commit() except Exception: db.session.rollback() raise finally: turn_on_sqlalchemy_events()
def main(filetarget, filename, access_key, access_secret, bucket): if not re.match(VALID_FILENAMES, filename): raise IOError( 'Not a valid filename. Filenames must have COSTARS with number separated by a dash (Ex. "COSTARS-3.csv").' ) data = extract(filetarget) s3_files = None # connect to s3 and get contents of bucket bucket = connect_to_s3_bucket(access_key, access_secret, bucket) if bucket: s3_files = bucket.list() try: for row in data: try: turn_off_sqlalchemy_events() except InvalidRequestError: pass company, new_company = get_or_create( db.session, Company, company_name=convert_empty_to_none(row.get('Company'))) company_contact = determine_company_contact(row) if company_contact: # create the new company contact company_contact, new_contact = get_or_create( db.session, CompanyContact, company_id=company.id, **company_contact) if new_contact: db.session.add(company_contact) db.session.commit() costars_awardee = convert_empty_to_none(row.get('Company')) try: expiration = datetime.datetime.strptime( row.get('Expiration'), '%m/%d/%y') except ValueError: expiration = None costars_type, _ = get_or_create(db.session, ContractType, name='COSTARS') # create or select the contract object contract, new_contract = get_or_create( db.session, ContractBase, contract_type=costars_type, expiration_date=expiration, financial_id=convert_empty_to_none(row.get('CONTROLLER')), description='{costars} - {company}'.format( costars=filename.replace('-', ' ').rstrip('.csv').upper(), company=costars_awardee)) # connect to s3 if s3_files: # all files start with 'costars-{number}-', which we should be # able to get from our filename max_ratio = (None, 0) startswith = filename.strip('.csv').lower() for _file in s3_files: _filename = _file.name.encode('utf-8').strip( '.pdf').rstrip('.') costars_awardee = costars_awardee.rstrip('.') # because the file start patterns are consistent, strip # out the costars-{number}- _file_awardee = _filename.split('-')[2] # check for absolute matches match_ratio = SM(lambda x: bool(re.match(JUNK_STRING, x)), costars_awardee, _file_awardee).ratio() if match_ratio == 1: # this is an absolute match, insert it into the db and break max_ratio = (_file.generate_url(expires_in=0, query_auth=False), match_ratio) if _filename.startswith(startswith): break else: continue elif match_ratio > max_ratio[1]: # this is the best match we have so far max_ratio = (_file.generate_url(expires_in=0, query_auth=False), match_ratio) continue # use the best match that we have print contract.description, max_ratio if max_ratio[1] > 0.7: contract.contract_href = max_ratio[0] for k, v in row.iteritems(): if k in CONSTANT_FIELDS: continue # insert a new contract property with where the company is located elif k == 'County Located': if row.get('County Located') != '': county_located, new_county_located = get_or_create( db.session, ContractProperty, contract_id=contract.id, key='Located in', value=convert_empty_to_none( '{county} County'.format( county=row.get('County Located')))) else: continue if new_county_located: db.session.add(county_located) # insert a new property with the listed manufacturers elif k == 'Manufacturers': if convert_empty_to_none(row.get('Manufacturers')): manufacturer, new_manufacturer = get_or_create( db.session, ContractProperty, contract_id=contract.id, key='List of manufacturers', value=convert_empty_to_none( row.get('Manufacturers'))) if new_manufacturer: db.session.add(manufacturer) # we are treating everything else like a line item, # so upload all of those pieces else: if convert_to_bool(convert_empty_to_none(v)): line_item, new_line_item = get_or_create( db.session, LineItem, contract_id=contract.id, description=convert_empty_to_none(k)) else: continue if new_line_item: db.session.add(line_item) contract.companies.append(company) db.session.commit() except Exception: db.session.rollback() raise finally: turn_on_sqlalchemy_events()
def main(file_target='./files/2015-10-27-state-contracts.csv'): data = extract(file_target) try: for row in data: try: turn_off_sqlalchemy_events() except InvalidRequestError: pass # create or select the company try: company, new_company = get_or_create( db.session, Company, company_name=convert_empty_to_none(row.get('COMPANY')) ) except IntegrityError: db.session.rollback() company = None company_contact = determine_company_contact(row) if company_contact and company: # create the new company contact company_contact, new_contact = get_or_create( db.session, CompanyContact, company_id=company.id, **company_contact ) if new_contact: db.session.add(company_contact) db.session.commit() try: expiration = datetime.datetime.strptime(row.get('EXPIRATION'), '%m/%d/%Y') except ValueError: expiration = None try: _financial_id = convert_empty_to_none(row.get('CONTROLLER')) except ValueError: _financial_id = None contract_type, _ = get_or_create( db.session, ContractType, name=convert_empty_to_none(row.get('TYPE OF CONTRACT')) ) # create or select the contract object contract, new_contract = get_or_create( db.session, ContractBase, contract_type=contract_type, expiration_date=expiration, financial_id=_financial_id, description=convert_empty_to_none(row.get('SERVICE')), contract_href=BASE_CONTRACT_URL.format( number=convert_empty_to_none(row.get('CONTRACT')), type='Overview' if 'IT SERVICES ITQ' in convert_empty_to_none(row.get('SERVICE')).upper() else 'ContractFile' ) ) parent_number, new_parent_number = get_or_create( db.session, ContractProperty, commit=False, contract_id=contract.id, key='Parent Number', value=convert_empty_to_none(row.get('PARENT')) ) if new_parent_number: db.session.add(parent_number) contract_number, new_contract_number = get_or_create( db.session, ContractProperty, commit=False, contract_id=contract.id, key='Contract Number', value=convert_empty_to_none(row.get('CONTRACT')) ) if new_contract_number: db.session.add(contract_number) if company: contract.companies.append(company) db.session.commit() except Exception: db.session.rollback() raise finally: turn_on_sqlalchemy_events()
def main(file_target='./files/2015-05-05-contractlist.csv'): data = extract(file_target) try: for row in data: # create or select the company try: company, new_company = get_or_create( db.session, Company, company_name=convert_empty_to_none(row.get('COMPANY')) ) except IntegrityError: db.session.rollback() company = None company_contact = determine_company_contact(row) if company_contact and company: # create the new company contact company_contact, new_contact = get_or_create( db.session, CompanyContact, company_id=company.id, **company_contact ) if new_contact: db.session.add(company_contact) db.session.commit() try: expiration = datetime.datetime.strptime(row.get('EXPIRATION'), '%m/%d/%y') except ValueError: expiration = None try: _financial_id = convert_empty_to_none(row.get('CONTROLLER')) except ValueError: _financial_id = None contract_type, _ = get_or_create( db.session, ContractType, name=convert_empty_to_none(row.get('TYPE OF CONTRACT')) ) # create or select the contract object contract, new_contract = get_or_create( db.session, ContractBase, contract_type=contract_type, expiration_date=expiration, financial_id=_financial_id, description=convert_empty_to_none(row.get('SERVICE')) ) if contract.contract_type == 'County': contract.contract_href = BASE_CONTRACT_URL.format( number=convert_contract_number(convert_empty_to_none(row.get('CONTRACT'))) ) contract_number, new_contract_number = get_or_create( db.session, ContractProperty, commit=False, contract_id=contract.id, key='Spec Number', value=convert_empty_to_none(row.get('CONTRACT')) ) if new_contract_number: db.session.add(contract_number) if company: contract.companies.append(company) db.session.commit() except Exception: db.session.rollback() raise
def main(file_target='./files/2015-05-05-contractlist.csv'): data = extract(file_target) try: for row in data: # create or select the company try: company, new_company = get_or_create( db.session, Company, company_name=convert_empty_to_none(row.get('COMPANY'))) except IntegrityError: db.session.rollback() company = None company_contact = determine_company_contact(row) if company_contact and company: # create the new company contact company_contact, new_contact = get_or_create( db.session, CompanyContact, company_id=company.id, **company_contact) if new_contact: db.session.add(company_contact) db.session.commit() try: expiration = datetime.datetime.strptime( row.get('EXPIRATION'), '%m/%d/%y') except ValueError: expiration = None try: _financial_id = convert_empty_to_none( int(row.get('CONTROLLER'))) except ValueError: _financial_id = None # create or select the contract object contract, new_contract = get_or_create( db.session, ContractBase, contract_type=convert_empty_to_none( row.get('TYPE OF CONTRACT')), expiration_date=expiration, financial_id=_financial_id, description=convert_empty_to_none(row.get('SERVICE'))) if contract.contract_type == 'County': contract.contract_href = BASE_CONTRACT_URL.format( number=convert_contract_number( convert_empty_to_none(row.get('CONTRACT')))) contract_number, new_contract_number = get_or_create( db.session, ContractProperty, commit=False, contract_id=contract.id, key='Spec Number', value=convert_empty_to_none(row.get('CONTRACT'))) if new_contract_number: db.session.add(contract_number) if company: contract.companies.append(company) db.session.commit() except Exception: db.session.rollback() raise
def main(file_target='./files/2015-10-27-state-contracts.csv'): data = extract(file_target) try: for row in data: try: turn_off_sqlalchemy_events() except InvalidRequestError: pass # create or select the company try: company, new_company = get_or_create( db.session, Company, company_name=convert_empty_to_none(row.get('COMPANY'))) except IntegrityError: db.session.rollback() company = None company_contact = determine_company_contact(row) if company_contact and company: # create the new company contact company_contact, new_contact = get_or_create( db.session, CompanyContact, company_id=company.id, **company_contact) if new_contact: db.session.add(company_contact) db.session.commit() try: expiration = datetime.datetime.strptime( row.get('EXPIRATION'), '%m/%d/%Y') except ValueError: expiration = None try: _financial_id = convert_empty_to_none(row.get('CONTROLLER')) except ValueError: _financial_id = None contract_type, _ = get_or_create(db.session, ContractType, name=convert_empty_to_none( row.get('TYPE OF CONTRACT'))) # create or select the contract object contract, new_contract = get_or_create( db.session, ContractBase, contract_type=contract_type, expiration_date=expiration, financial_id=_financial_id, description=convert_empty_to_none(row.get('SERVICE')), contract_href=BASE_CONTRACT_URL.format( number=convert_empty_to_none(row.get('CONTRACT')), type='Overview' if 'IT SERVICES ITQ' in convert_empty_to_none( row.get('SERVICE')).upper() else 'ContractFile')) parent_number, new_parent_number = get_or_create( db.session, ContractProperty, contract_id=contract.id, key='Parent Number', value=convert_empty_to_none(row.get('PARENT'))) if new_parent_number: db.session.add(parent_number) contract_number, new_contract_number = get_or_create( db.session, ContractProperty, contract_id=contract.id, key='Contract Number', value=convert_empty_to_none(row.get('CONTRACT'))) if new_contract_number: db.session.add(contract_number) if company: contract.companies.append(company) db.session.commit() except Exception: db.session.rollback() raise finally: turn_on_sqlalchemy_events()