コード例 #1
0
def main(filetarget, filename, access_key, access_secret, bucket):
    if not re.match(VALID_FILENAMES, filename):
        raise IOError('Not a valid filename. Filenames must have COSTARS with number separated by a dash (Ex. "COSTARS-3.csv").')

    data = extract(filetarget)
    s3_files = None

    # connect to s3 and get contents of bucket
    bucket = connect_to_s3_bucket(access_key, access_secret, bucket)
    if bucket:
        s3_files = bucket.list()

    try:
        for row in data:

            try:
                turn_off_sqlalchemy_events()
            except InvalidRequestError:
                pass

            company, new_company = get_or_create(
                db.session, Company,
                company_name=convert_empty_to_none(row.get('Company'))
            )

            company_contact = determine_company_contact(row)

            if company_contact:

                # create the new company contact
                company_contact, new_contact = get_or_create(
                    db.session, CompanyContact,
                    company_id=company.id,
                    **company_contact
                )

                if new_contact:
                    db.session.add(company_contact)
                    db.session.commit()

            costars_awardee = convert_empty_to_none(row.get('Company'))

            try:
                expiration = datetime.datetime.strptime(row.get('Expiration'), '%m/%d/%y')
            except ValueError:
                expiration = None

            costars_type, _ = get_or_create(
                db.session, ContractType,
                name='COSTARS'
            )

            # create or select the contract object
            contract, new_contract = get_or_create(
                db.session, ContractBase,
                contract_type=costars_type,
                expiration_date=expiration,
                financial_id=convert_empty_to_none(row.get('CONTROLLER')),
                description='{costars} - {company}'.format(
                    costars=filename.replace('-', ' ').rstrip('.csv').upper(),
                    company=costars_awardee
                )
            )

            # connect to s3
            if s3_files:
                # all files start with 'costars-{number}-', which we should be
                # able to get from our filename
                max_ratio = (None, 0)

                startswith = filename.strip('.csv').lower()
                for _file in s3_files:
                    _filename = _file.name.encode('utf-8').strip('.pdf').rstrip('.')
                    costars_awardee = costars_awardee.rstrip('.')

                    # because the file start patterns are consistent, strip
                    # out the costars-{number}-
                    _file_awardee = _filename.split('-')[2]

                    # check for absolute matches
                    match_ratio = SM(lambda x: bool(re.match(JUNK_STRING, x)), costars_awardee, _file_awardee).ratio()
                    if match_ratio == 1:
                        # this is an absolute match, insert it into the db and break
                        max_ratio = (_file.generate_url(expires_in=0, query_auth=False), match_ratio)
                        if _filename.startswith(startswith):
                            break
                        else:
                            continue

                    elif match_ratio > max_ratio[1]:
                        # this is the best match we have so far
                        max_ratio = (_file.generate_url(expires_in=0, query_auth=False), match_ratio)
                        continue

                # use the best match that we have
                print contract.description, max_ratio
                if max_ratio[1] > 0.7:
                    contract.contract_href = max_ratio[0]

            for k, v in row.iteritems():
                if k in CONSTANT_FIELDS:
                    continue

                # insert a new contract property with where the company is located
                elif k == 'County Located':
                    if row.get('County Located') != '':
                        county_located, new_county_located = get_or_create(
                            db.session, ContractProperty,
                            contract_id=contract.id,
                            key='Located in',
                            value=convert_empty_to_none(
                                '{county} County'.format(county=row.get('County Located'))
                            )
                        )
                    else:
                        continue

                    if new_county_located:
                        db.session.add(county_located)

                # insert a new property with the listed manufacturers
                elif k == 'Manufacturers':

                    if convert_empty_to_none(row.get('Manufacturers')):

                        manufacturer, new_manufacturer = get_or_create(
                            db.session, ContractProperty,
                            contract_id=contract.id,
                            key='List of manufacturers',
                            value=convert_empty_to_none(row.get('Manufacturers'))
                        )

                        if new_manufacturer:
                            db.session.add(manufacturer)

                # we are treating everything else like a line item,
                # so upload all of those pieces
                else:
                    if convert_to_bool(convert_empty_to_none(v)):
                        line_item, new_line_item = get_or_create(
                            db.session, LineItem,
                            contract_id=contract.id,
                            description=convert_empty_to_none(k)
                        )
                    else:
                        continue

                    if new_line_item:
                        db.session.add(line_item)

            contract.companies.append(company)
            db.session.commit()

    except Exception:
        db.session.rollback()
        raise

    finally:
        turn_on_sqlalchemy_events()
コード例 #2
0
def main(filetarget, filename, access_key, access_secret, bucket):
    if not re.match(VALID_FILENAMES, filename):
        raise IOError(
            'Not a valid filename. Filenames must have COSTARS with number separated by a dash (Ex. "COSTARS-3.csv").'
        )

    data = extract(filetarget)
    s3_files = None

    # connect to s3 and get contents of bucket
    bucket = connect_to_s3_bucket(access_key, access_secret, bucket)
    if bucket:
        s3_files = bucket.list()

    try:
        for row in data:

            try:
                turn_off_sqlalchemy_events()
            except InvalidRequestError:
                pass

            company, new_company = get_or_create(
                db.session,
                Company,
                company_name=convert_empty_to_none(row.get('Company')))

            company_contact = determine_company_contact(row)

            if company_contact:

                # create the new company contact
                company_contact, new_contact = get_or_create(
                    db.session,
                    CompanyContact,
                    company_id=company.id,
                    **company_contact)

                if new_contact:
                    db.session.add(company_contact)
                    db.session.commit()

            costars_awardee = convert_empty_to_none(row.get('Company'))

            try:
                expiration = datetime.datetime.strptime(
                    row.get('Expiration'), '%m/%d/%y')
            except ValueError:
                expiration = None

            costars_type, _ = get_or_create(db.session,
                                            ContractType,
                                            name='COSTARS')

            # create or select the contract object
            contract, new_contract = get_or_create(
                db.session,
                ContractBase,
                contract_type=costars_type,
                expiration_date=expiration,
                financial_id=convert_empty_to_none(row.get('CONTROLLER')),
                description='{costars} - {company}'.format(
                    costars=filename.replace('-', ' ').rstrip('.csv').upper(),
                    company=costars_awardee))

            # connect to s3
            if s3_files:
                # all files start with 'costars-{number}-', which we should be
                # able to get from our filename
                max_ratio = (None, 0)

                startswith = filename.strip('.csv').lower()
                for _file in s3_files:
                    _filename = _file.name.encode('utf-8').strip(
                        '.pdf').rstrip('.')
                    costars_awardee = costars_awardee.rstrip('.')

                    # because the file start patterns are consistent, strip
                    # out the costars-{number}-
                    _file_awardee = _filename.split('-')[2]

                    # check for absolute matches
                    match_ratio = SM(lambda x: bool(re.match(JUNK_STRING, x)),
                                     costars_awardee, _file_awardee).ratio()
                    if match_ratio == 1:
                        # this is an absolute match, insert it into the db and break
                        max_ratio = (_file.generate_url(expires_in=0,
                                                        query_auth=False),
                                     match_ratio)
                        if _filename.startswith(startswith):
                            break
                        else:
                            continue

                    elif match_ratio > max_ratio[1]:
                        # this is the best match we have so far
                        max_ratio = (_file.generate_url(expires_in=0,
                                                        query_auth=False),
                                     match_ratio)
                        continue

                # use the best match that we have
                print contract.description, max_ratio
                if max_ratio[1] > 0.7:
                    contract.contract_href = max_ratio[0]

            for k, v in row.iteritems():
                if k in CONSTANT_FIELDS:
                    continue

                # insert a new contract property with where the company is located
                elif k == 'County Located':
                    if row.get('County Located') != '':
                        county_located, new_county_located = get_or_create(
                            db.session,
                            ContractProperty,
                            contract_id=contract.id,
                            key='Located in',
                            value=convert_empty_to_none(
                                '{county} County'.format(
                                    county=row.get('County Located'))))
                    else:
                        continue

                    if new_county_located:
                        db.session.add(county_located)

                # insert a new property with the listed manufacturers
                elif k == 'Manufacturers':

                    if convert_empty_to_none(row.get('Manufacturers')):

                        manufacturer, new_manufacturer = get_or_create(
                            db.session,
                            ContractProperty,
                            contract_id=contract.id,
                            key='List of manufacturers',
                            value=convert_empty_to_none(
                                row.get('Manufacturers')))

                        if new_manufacturer:
                            db.session.add(manufacturer)

                # we are treating everything else like a line item,
                # so upload all of those pieces
                else:
                    if convert_to_bool(convert_empty_to_none(v)):
                        line_item, new_line_item = get_or_create(
                            db.session,
                            LineItem,
                            contract_id=contract.id,
                            description=convert_empty_to_none(k))
                    else:
                        continue

                    if new_line_item:
                        db.session.add(line_item)

            contract.companies.append(company)
            db.session.commit()

    except Exception:
        db.session.rollback()
        raise

    finally:
        turn_on_sqlalchemy_events()
コード例 #3
0
def main(file_target='./files/2015-10-27-state-contracts.csv'):
    data = extract(file_target)

    try:
        for row in data:
            try:
                turn_off_sqlalchemy_events()
            except InvalidRequestError:
                pass

            # create or select the company
            try:
                company, new_company = get_or_create(
                    db.session, Company,
                    company_name=convert_empty_to_none(row.get('COMPANY'))
                )
            except IntegrityError:
                db.session.rollback()
                company = None

            company_contact = determine_company_contact(row)

            if company_contact and company:

                # create the new company contact
                company_contact, new_contact = get_or_create(
                    db.session, CompanyContact,
                    company_id=company.id,
                    **company_contact
                )

                if new_contact:
                    db.session.add(company_contact)
                    db.session.commit()

            try:
                expiration = datetime.datetime.strptime(row.get('EXPIRATION'), '%m/%d/%Y')
            except ValueError:
                expiration = None

            try:
                _financial_id = convert_empty_to_none(row.get('CONTROLLER'))
            except ValueError:
                _financial_id = None

            contract_type, _ = get_or_create(
                db.session, ContractType,
                name=convert_empty_to_none(row.get('TYPE OF CONTRACT'))
            )

            # create or select the contract object
            contract, new_contract = get_or_create(
                db.session, ContractBase,
                contract_type=contract_type,
                expiration_date=expiration,
                financial_id=_financial_id,
                description=convert_empty_to_none(row.get('SERVICE')),
                contract_href=BASE_CONTRACT_URL.format(
                    number=convert_empty_to_none(row.get('CONTRACT')),
                    type='Overview'
                    if 'IT SERVICES ITQ' in convert_empty_to_none(row.get('SERVICE')).upper()
                    else 'ContractFile'
                )
            )

            parent_number, new_parent_number = get_or_create(
                db.session, ContractProperty, commit=False,
                contract_id=contract.id,
                key='Parent Number',
                value=convert_empty_to_none(row.get('PARENT'))
            )

            if new_parent_number:
                db.session.add(parent_number)

            contract_number, new_contract_number = get_or_create(
                db.session, ContractProperty, commit=False,
                contract_id=contract.id,
                key='Contract Number',
                value=convert_empty_to_none(row.get('CONTRACT'))
            )

            if new_contract_number:
                db.session.add(contract_number)

            if company:
                contract.companies.append(company)
            db.session.commit()

    except Exception:
        db.session.rollback()
        raise

    finally:
        turn_on_sqlalchemy_events()
コード例 #4
0
def main(file_target='./files/2015-05-05-contractlist.csv'):
    data = extract(file_target)

    try:
        for row in data:
            # create or select the company
            try:
                company, new_company = get_or_create(
                    db.session, Company,
                    company_name=convert_empty_to_none(row.get('COMPANY'))
                )
            except IntegrityError:
                db.session.rollback()
                company = None

            company_contact = determine_company_contact(row)

            if company_contact and company:

                # create the new company contact
                company_contact, new_contact = get_or_create(
                    db.session, CompanyContact,
                    company_id=company.id,
                    **company_contact
                )

                if new_contact:
                    db.session.add(company_contact)
                    db.session.commit()

            try:
                expiration = datetime.datetime.strptime(row.get('EXPIRATION'), '%m/%d/%y')
            except ValueError:
                expiration = None

            try:
                _financial_id = convert_empty_to_none(row.get('CONTROLLER'))
            except ValueError:
                _financial_id = None

            contract_type, _ = get_or_create(
                db.session, ContractType,
                name=convert_empty_to_none(row.get('TYPE OF CONTRACT'))
            )

            # create or select the contract object
            contract, new_contract = get_or_create(
                db.session, ContractBase,
                contract_type=contract_type,
                expiration_date=expiration,
                financial_id=_financial_id,
                description=convert_empty_to_none(row.get('SERVICE'))
            )

            if contract.contract_type == 'County':
                contract.contract_href = BASE_CONTRACT_URL.format(
                    number=convert_contract_number(convert_empty_to_none(row.get('CONTRACT')))
                )

            contract_number, new_contract_number = get_or_create(
                db.session, ContractProperty, commit=False,
                contract_id=contract.id,
                key='Spec Number',
                value=convert_empty_to_none(row.get('CONTRACT'))
            )

            if new_contract_number:
                db.session.add(contract_number)

            if company:
                contract.companies.append(company)
            db.session.commit()

    except Exception:
        db.session.rollback()
        raise
コード例 #5
0
def main(file_target='./files/2015-05-05-contractlist.csv'):
    data = extract(file_target)

    try:
        for row in data:
            # create or select the company
            try:
                company, new_company = get_or_create(
                    db.session,
                    Company,
                    company_name=convert_empty_to_none(row.get('COMPANY')))
            except IntegrityError:
                db.session.rollback()
                company = None

            company_contact = determine_company_contact(row)

            if company_contact and company:

                # create the new company contact
                company_contact, new_contact = get_or_create(
                    db.session,
                    CompanyContact,
                    company_id=company.id,
                    **company_contact)

                if new_contact:
                    db.session.add(company_contact)
                    db.session.commit()

            try:
                expiration = datetime.datetime.strptime(
                    row.get('EXPIRATION'), '%m/%d/%y')
            except ValueError:
                expiration = None

            try:
                _financial_id = convert_empty_to_none(
                    int(row.get('CONTROLLER')))
            except ValueError:
                _financial_id = None

            # create or select the contract object
            contract, new_contract = get_or_create(
                db.session,
                ContractBase,
                contract_type=convert_empty_to_none(
                    row.get('TYPE OF CONTRACT')),
                expiration_date=expiration,
                financial_id=_financial_id,
                description=convert_empty_to_none(row.get('SERVICE')))

            if contract.contract_type == 'County':
                contract.contract_href = BASE_CONTRACT_URL.format(
                    number=convert_contract_number(
                        convert_empty_to_none(row.get('CONTRACT'))))

            contract_number, new_contract_number = get_or_create(
                db.session,
                ContractProperty,
                commit=False,
                contract_id=contract.id,
                key='Spec Number',
                value=convert_empty_to_none(row.get('CONTRACT')))

            if new_contract_number:
                db.session.add(contract_number)

            if company:
                contract.companies.append(company)
            db.session.commit()

    except Exception:
        db.session.rollback()
        raise
コード例 #6
0
def main(file_target='./files/2015-10-27-state-contracts.csv'):
    data = extract(file_target)

    try:
        for row in data:
            try:
                turn_off_sqlalchemy_events()
            except InvalidRequestError:
                pass

            # create or select the company
            try:
                company, new_company = get_or_create(
                    db.session,
                    Company,
                    company_name=convert_empty_to_none(row.get('COMPANY')))
            except IntegrityError:
                db.session.rollback()
                company = None

            company_contact = determine_company_contact(row)

            if company_contact and company:

                # create the new company contact
                company_contact, new_contact = get_or_create(
                    db.session,
                    CompanyContact,
                    company_id=company.id,
                    **company_contact)

                if new_contact:
                    db.session.add(company_contact)
                    db.session.commit()

            try:
                expiration = datetime.datetime.strptime(
                    row.get('EXPIRATION'), '%m/%d/%Y')
            except ValueError:
                expiration = None

            try:
                _financial_id = convert_empty_to_none(row.get('CONTROLLER'))
            except ValueError:
                _financial_id = None

            contract_type, _ = get_or_create(db.session,
                                             ContractType,
                                             name=convert_empty_to_none(
                                                 row.get('TYPE OF CONTRACT')))

            # create or select the contract object
            contract, new_contract = get_or_create(
                db.session,
                ContractBase,
                contract_type=contract_type,
                expiration_date=expiration,
                financial_id=_financial_id,
                description=convert_empty_to_none(row.get('SERVICE')),
                contract_href=BASE_CONTRACT_URL.format(
                    number=convert_empty_to_none(row.get('CONTRACT')),
                    type='Overview'
                    if 'IT SERVICES ITQ' in convert_empty_to_none(
                        row.get('SERVICE')).upper() else 'ContractFile'))

            parent_number, new_parent_number = get_or_create(
                db.session,
                ContractProperty,
                contract_id=contract.id,
                key='Parent Number',
                value=convert_empty_to_none(row.get('PARENT')))

            if new_parent_number:
                db.session.add(parent_number)

            contract_number, new_contract_number = get_or_create(
                db.session,
                ContractProperty,
                contract_id=contract.id,
                key='Contract Number',
                value=convert_empty_to_none(row.get('CONTRACT')))

            if new_contract_number:
                db.session.add(contract_number)

            if company:
                contract.companies.append(company)
            db.session.commit()

    except Exception:
        db.session.rollback()
        raise

    finally:
        turn_on_sqlalchemy_events()