def main(stage, framework_slug, api_token, user, supplier_ids=None): agreements_bucket_name = 'digitalmarketplace-agreements-{0}-{0}'.format(stage) agreements_bucket = S3(agreements_bucket_name) api_client = DataAPIClient( get_api_endpoint_from_stage(stage, 'api'), api_token ) if supplier_ids is not None: supplier_ids = [int(supplier_id.strip()) for supplier_id in supplier_ids.split(',')] suppliers = api_client.find_framework_suppliers(framework_slug, agreement_returned=True)['supplierFrameworks'] if supplier_ids is not None: missing_supplier_ids = set(supplier_ids) - set(supplier['supplierId'] for supplier in suppliers) if missing_supplier_ids: raise Exception("Invalid supplier IDs: {}".format(', '.join(str(x) for x in missing_supplier_ids))) else: supplier_ids = set(supplier['supplierId'] for supplier in suppliers) for supplier_id in supplier_ids: logger.info("Resetting agreement returned flag for supplier {supplier_id}", extra={'supplier_id': supplier_id}) api_client.unset_framework_agreement_returned(supplier_id, framework_slug, user) signed_agreements = filter( lambda x: match_signed_agreements(supplier_ids, x['path']), agreements_bucket.list('{}/agreements/'.format(framework_slug)) ) for document in signed_agreements: logger.info("Deleting {path}", extra={'path': document['path']}) agreements_bucket.delete_key(document['path'])
if __name__ == '__main__': arguments = docopt(__doc__) data_api_url = get_api_endpoint_from_stage(arguments['<stage>'], 'api') client = DataAPIClient(data_api_url, get_auth_token('api', arguments['<stage>'])) FRAMEWORKS = ['g-cloud-7', 'g-cloud-8', 'digital-outcomes-and-specialists'] BUCKET_NAME = get_bucket_name(arguments['<stage>']) BUCKET = s3.S3(BUCKET_NAME) print("STARTED AT {}".format(time.strftime('%X %x %Z'))) for framework_slug in FRAMEWORKS: # Get all supplier frameworks who have returned their agreement supplier_frameworks = client.find_framework_suppliers( framework_slug=framework_slug, agreement_returned=True)['supplierFrameworks'] for supplier_framework in supplier_frameworks: print("======================") print("Supplier ID: {}, Agreement ID: {}".format( supplier_framework['supplierId'], supplier_framework['agreementId'])) # Get their framework agreement framework_agreement = client.get_framework_agreement( supplier_framework['agreementId'])['agreement'] # Skip if they already have a path if framework_agreement.get('signedAgreementPath'): print("PATH ALREADY EXISTS: {}".format(
return 'digitalmarketplace-agreements-{0}-{0}'.format(stage) if __name__ == '__main__': arguments = docopt(__doc__) data_api_url = get_api_endpoint_from_stage(arguments['<stage>'], 'api') client = DataAPIClient(data_api_url, arguments['<api_token>']) FRAMEWORKS = ['g-cloud-7', 'g-cloud-8', 'digital-outcomes-and-specialists'] BUCKET_NAME = get_bucket_name(arguments['<stage>']) BUCKET = s3.S3(BUCKET_NAME) print("STARTED AT {}".format(time.strftime('%X %x %Z'))) for framework_slug in FRAMEWORKS: # Get all supplier frameworks who have returned their agreement supplier_frameworks = client.find_framework_suppliers( framework_slug=framework_slug, agreement_returned=True)['supplierFrameworks'] for supplier_framework in supplier_frameworks: print("======================") print "Supplier ID: {}, Agreement ID: {}".format( supplier_framework['supplierId'], supplier_framework['agreementId']) # Get their framework agreement framework_agreement = client.get_framework_agreement(supplier_framework['agreementId'])['agreement'] # Skip if they already have a path if framework_agreement.get('signedAgreementPath'): print "PATH ALREADY EXISTS: {}".format(framework_agreement['signedAgreementPath']) continue # Find file path from s3
args.new_slug) # 0) Store the current framework state so that it can be restored at the end. current_framework_state = data_api_client.get_framework( args.new_slug)['frameworks']['status'] # 1) Set a given (new) framework to open data_api_client._post_with_updated_by( url='{}/frameworks/{}'.format(data_api_url, args.new_slug), data={"frameworks": { "status": "open" }}, user=identity) # 2) Find some suppliers with services on a given (old) framework (e.g. g-cloud-7) suppliers = data_api_client.find_framework_suppliers( framework_slug=args.old_slug)['supplierFrameworks'] suppliers = [ x for x in suppliers if x['supplierId'] not in SUPPLIER_BLACKLIST ] suppliers = random.sample(suppliers, args.supplier_count) suppliers_prepared = set() # 3) Submit draft services through randomly selected suppliers. while services_generated < args.quantity: random_lot = random.choice(args.lots) random_supplier = random.choice(suppliers) supplier_id = random_supplier['supplierId'] if supplier_id not in suppliers_prepared: # 2a) For each supplier, register interest in a given (new) framework data_api_client.register_framework_interest(
if __name__ == '__main__': arguments = docopt(__doc__) data_api_url = get_api_endpoint_from_stage(arguments['<stage>'], 'api') client = DataAPIClient(data_api_url, get_auth_token('api', arguments['<stage>'])) FRAMEWORKS = ['g-cloud-7', 'g-cloud-8', 'digital-outcomes-and-specialists'] BUCKET_NAME = get_bucket_name(arguments['<stage>']) BUCKET = s3.S3(BUCKET_NAME) print("STARTED AT {}".format(time.strftime('%X %x %Z'))) for framework_slug in FRAMEWORKS: # Get all supplier frameworks who have returned their agreement supplier_frameworks = client.find_framework_suppliers( framework_slug=framework_slug, agreement_returned=True, with_declarations=None)['supplierFrameworks'] for supplier_framework in supplier_frameworks: print("======================") print("Supplier ID: {}, Agreement ID: {}".format( supplier_framework['supplierId'], supplier_framework['agreementId'])) # Get their framework agreement framework_agreement = client.get_framework_agreement( supplier_framework['agreementId'])['agreement'] # Skip if they already have a path if framework_agreement.get('signedAgreementPath'): print("PATH ALREADY EXISTS: {}".format(
# 0) Store the current framework state so that it can be restored at the end. current_framework_state = data_api_client.get_framework( args.new_slug)['frameworks']['status'] # 1) Set a given (new) framework to open data_api_client._post_with_updated_by( url='{}/frameworks/{}'.format(data_api_url, args.new_slug), data={"frameworks": { "status": "open" }}, user=identity) # 2) Find some suppliers with services on a given (old) framework (e.g. g-cloud-7) suppliers = data_api_client.find_framework_suppliers( framework_slug=args.old_slug, with_declarations=None)['supplierFrameworks'] suppliers = [ x for x in suppliers if x['supplierId'] not in SUPPLIER_BLACKLIST ] suppliers = random.sample(suppliers, args.supplier_count) suppliers_prepared = set() # 3) Submit draft services through randomly selected suppliers. while services_generated < args.quantity: random_lot = random.choice(args.lots) random_supplier = random.choice(suppliers) supplier_id = random_supplier['supplierId'] if supplier_id not in suppliers_prepared: # 2a) For each supplier, register interest in a given (new) framework
class DNBDirectAPIClient(object): "A client utilising the D&B direct API with methods used by the DMp API." def __init__(self, username, password, stage='production'): "initialisation with D&B credentials and optionally DMp stage" self.stage = stage self.data_api_client = None self.duns_number_compared = set() # faster in operation than a list self.max_dnb_api_calls_per_method = 100 self.root = 'https://direct.dnb.com' response = requests.post(f'{self.root}/Authentication/V2.0/', headers={ 'x-dnb-user': username, 'x-dnb-pwd': password, }) self.dnb_auth_token = response.json()['AuthenticationDetail']['Token'] def get_dnb_org_data(self, duns_number): "return a dictionary with all the information D&B has on the company with that DUNS number" response = requests.get( f'{self.root}/V6.0/organizations?match=true&MatchTypeText=Basic&DUNSNumber={str(duns_number)}', headers={ 'Authorization': self.dnb_auth_token, }) # CM000 is the response code the D&B API returns when a request has been completed successfully. # For more information on the D&B API response codes see the link at the DNBAPIException docstring. if response.json( )['MatchResponse']['TransactionResult']['ResultID'] == 'CM000': return response.json( )['MatchResponse']['MatchResponseDetail']['MatchCandidate'][0] else: raise DNBAPIException( response.json()['MatchResponse']['TransactionResult'] ['ResultID'], response.json()['MatchResponse']['TransactionResult'] ['ResultText']) def extract_dnb_org_data(self, dnb_data): "returns an array with the D&B data that can be compared with DMp's suppliers" return [ # company name dnb_data.get('OrganizationPrimaryName', {}).get('OrganizationName', {}).get('$'), # UK company house number dnb_data.get('OrganizationIdentificationNumberDetail', {}).get('OrganizationIdentificationNumber'), # the 2 letter country code is in capital case of the primary address dnb_data.get('PrimaryAddress', {}).get('CountryISOAlpha2Code'), # primary postal code dnb_data.get('PrimaryAddress', {}).get('PostalCode'), # primary address next( iter( dnb_data.get('PrimaryAddress', {}).get('StreetAddressLine', {})), {}).get('LineText'), ] def get_dmp_supplier_data(self, framework=None, duns_number=None, from_declaration=False): "return the DMp data for a given DUNS number and initialises the DMp client if None" # TODO: error handling if self.data_api_client is None: self.data_api_client = DataAPIClient( base_url=get_api_endpoint_from_stage(self.stage), auth_token=get_auth_token('api', self.stage)) if duns_number is not None: return self.data_api_client.find_suppliers(duns_number=duns_number) elif framework is not None: # TODO: use iter instead -> digitalmarketplace-apiclient/blob/master/dmapiclient/data.py#L119 # TODO: check pagination if from_declaration: return self.data_api_client.find_framework_suppliers(framework) return self.data_api_client.find_suppliers(framework=framework) # TODO: else raise error when duns or framework is None def extract_dmp_supplier_data(self, dmp_supplier_data, from_declaration=False): "returns an array with the DMp data that can be compared with D&B's organisations" if from_declaration: return [ # company name dmp_supplier_data.get('declaration', {}).get('supplierRegisteredName'), # UK company house number dmp_supplier_data.get( 'declaration', {}).get('supplierCompanyRegistrationNumber'), # the 2 letter country code dmp_supplier_data.get('declaration', {}).get('supplierRegisteredCountry', '').replace('country:', ''), # 1st postcode dmp_supplier_data.get('declaration', {}).get('supplierRegisteredPostcode'), # 1st address dmp_supplier_data.get('declaration', {}).get('supplierRegisteredBuilding'), ] return [ # company name dmp_supplier_data.get('registeredName'), # UK company house number dmp_supplier_data.get('companiesHouseNumber'), # the 2 letter country code dmp_supplier_data.get('registrationCountry', '').replace('country:', ''), # 1st postcode next(iter(dmp_supplier_data.get('contactInformation', []))).get('postcode'), # 1st address next(iter(dmp_supplier_data.get('contactInformation', []))).get('address1'), ] def compare_data(self, duns_number, dmp, dnb): """ return a list of similarity ratios between the two APIs data [DUNS number, exists in D&B, Name, House Nr, Phone, Country, Postcode, Address, ] """ if not dnb: return [ duns_number, datetime.datetime.now(), ] + [0.] * 7 return [ duns_number, # timestamp datetime.datetime.now(), # does DUNS number exists in the D&B database 1., # fuzzy matching of the company's name 0. if (dmp[0] is None or dnb[0] is None) else levenshtein_ratio_and_distance( dmp[0].lower(), dnb[0].lower(), ratio_calc=True), # house nr exact match 1. if dmp[1] is not None and dnb[1] is not None and dmp[1].lower() == dnb[1].lower() else 0., # country exact match 1. if dmp[2] != '' and dnb[2] is not None and dmp[2].lower() == dnb[2].lower() else 0., # postcode fuzzy match 0. if (dmp[3] is None or dnb[3] is None) else levenshtein_ratio_and_distance( dmp[3].lower(), dnb[3].lower(), ratio_calc=True), # fuzzy matching of the company's address 0. if (dmp[4] is None or dnb[4] is None) else levenshtein_ratio_and_distance( dmp[4].lower(), dnb[4].lower(), ratio_calc=True), '', # blank line for D&B API errors ] def fetch_and_compare_suppliers_data(self, duns_number): "compare one organisation/supplier's entry D&B with DMp data identified by its DUNS number" dmp_response = self.get_dmp_supplier_data(duns_number=duns_number) # TODO: if dmp_response.get('dunsNumber') is None: exit # TODO: if len(dmp_data['suppliers']) != 1 raise an error dmp = self.extract_dmp_supplier_data(dmp_response['suppliers'][0]) try: dnb = self.extract_dnb_org_data(self.get_dnb_org_data(duns_number)) return self.compare_data(duns_number, dmp, dnb) except DNBAPIException as e: return [ dmp_response['suppliers'][0], datetime.datetime.now(), # timestamp 0., # is in D&B 0., # company's name 0., # company house 0., # country exact match 0., # postcode exact match 0., # company's address f'{e.args[0]}: {e.args[1]}', # D&B failure reason ] def compare_suppliers(self, dmp_supplier, from_declaration, export_dnb_data, export_dmp_data, counter, writer=None): duns_number = dmp_supplier.get( 'declaration', {}).get('supplierDunsNumber' ) if from_declaration else dmp_supplier.get('dunsNumber') # don't compare if DMp has no DUNS for a supplier if duns_number is None: print('NO DUNS NUMBER FOR', self.extract_dmp_supplier_data( dmp_supplier, from_declaration=from_declaration) ) # TODO: raise/log error? return # excluding existing supplier of the CSV write or print the rest if duns_number not in self.duns_number_compared: dmp_data = self.extract_dmp_supplier_data( dmp_supplier, from_declaration=from_declaration) try: dnb_data = self.extract_dnb_org_data( self.get_dnb_org_data(duns_number)) row = self.compare_data(duns_number, dmp_data, dnb_data) if export_dnb_data: row += dnb_data if export_dmp_data: row += dmp_data except DNBAPIException as e: row = [ duns_number, datetime.datetime.now(), # timestamp 0., # is in D&B 0., # company's name 0., # company house 0., # country exact match 0., # postcode exact match 0., # company's address f'{e.code}: {e.text}', # D&B failure reason ] if export_dnb_data: row += [None] * 5 if export_dmp_data: row += dmp_data if writer is not None: counter += 1 print(duns_number) writer.writerow(row) # TODO: extend with D&B data self.duns_number_compared.add(duns_number) # add DUNS to set if counter >= self.max_dnb_api_calls_per_method: return else: print(row) # TODO: log maybe? def fetch_and_compare_frameworks_suppliers_data( self, frameworks=['g-cloud', 'digital-outcomes-and-specialists'], csv_filename=None, export_dnb_data=True, export_dmp_data=False, from_declaration=True): "retrieve DMp suppliers of frameworks and call for each the D&B API to compare and optionally append to CSV" counter = 0 # fetch all DUNS numbers from CSV and if it doesn't exist create it if csv_filename is not None: try: with open(csv_filename, 'rb') as f: reader = unicodecsv.reader(f, encoding='utf-8') next(reader) # skip the CSV's header for row in reader: print(row) self.duns_number_compared.add(row[0]) print(self.duns_number_compared) except FileNotFoundError: header = [ 'DUNS number', 'Compared at', 'In D&B?', 'Name', 'Company House', 'Country', 'Postcode', 'Address', 'D&B Error', ] if export_dnb_data: header += [ 'D&B Name', 'D&B Company House', 'D&B Country', 'D&B Postcode', 'D&B Address', ] if export_dmp_data: header += [ 'DMp Name', 'DMp Company House', 'DMp Country', 'DMp Postcode', 'DMp Address', ] with open(csv_filename, 'wb') as f: writer = unicodecsv.writer(f, encoding='utf-8') writer.writerow(header) # open for write later f = open(csv_filename, 'ab') # file handle is closed at the end of the method writer = unicodecsv.writer(f, encoding='utf-8') # iterate all the suppliers of the frameworks for framework in frameworks: dmp_response = self.get_dmp_supplier_data( framework=framework, from_declaration=from_declaration) if from_declaration: dmp_response = dmp_response.get('supplierFrameworks', []) else: dmp_response = dmp_response.get('suppliers', []) for dmp_supplier in dmp_response: self.compare_suppliers(dmp_supplier, from_declaration, export_dnb_data, export_dmp_data, counter, writer) # close file resource if necessary if csv_filename is not None: f.close()