Exemple #1
0
    def _get_officers(self, data, officer):
        """Get officers from dict"""

        officer_dicts = getlink(data, 'officers')['items']
        for off in officer_dicts:

            # fix the dict tho work CompaniesHouseOfficer class
            # pprint(off)

            # format the title
            splits = off['name'].split(',')
            name = '%s %s' % (splits[-1].title(), splits[0].title())

            # setting the appointments to [], ensures we dont recurse all through the companieshouse db
            off['appointments'] = []
            off['title'] = name
            off['address_snippet'] = ' '.join(off['address'].values())
            off['matches'] = {}

            x = CompaniesHouseOfficer(off)
            if self.match_significant_to_self(off,
                                              officer,
                                              fuzzy_threshold=65,
                                              count_threshold=1):
                x.isOfficer = True
            else:
                x.isOfficer = False

            self.officers.append(x.data)
Exemple #2
0
    def get_company_officers(self, company):
        """"""
        if company['links'].has_key('officers'):
            officers = getlink(company, 'officers')['items']
        else:
            officers = []

        return officers
Exemple #3
0
    def get_company_persons(self, company):
        """"""

        if company['links'].has_key('persons_with_significant_control'):
            persons = getlink(company,
                              'persons_with_significant_control')['items']
        else:
            persons = []

        return persons
Exemple #4
0
    def _get_company(self, data, get_officers, get_filing, get_persons):
        """Get company class from dict"""

        company_cls = CompaniesHouseCompany(getlink(data, 'company'),
                                            self._officer, get_officers,
                                            get_filing, get_persons)

        self.company_name = company_cls.company_name
        self.company_status = company_cls.company_status
        self.company_number = company_cls.company_number

        self.company = company_cls.data
Exemple #5
0
    def _get_persons(self, data, officer):
        """Get persons with significant control from dict"""

        persons_dicts = getlink(data,
                                'persons_with_significant_control')['items']
        for person in persons_dicts:

            x = CompaniesHousePerson(person)

            if self.match_significant_to_self(person, officer):
                x.isOfficer = True

            self.persons.append(x.data)
Exemple #6
0
    def __init__(self, item_id, category_id, raw_string, pretty, registered,
                 amount, company, link):
        """
		OtherShareholdingsItem
		"""

        Item.__init__(self, item_id, category_id, raw_string, pretty,
                      registered, amount)

        self.isWealth = True
        self.link = link

        self.company = getlink(company, 'self')
        persons = getlink(self.company, 'persons_with_significant_control')
        self.persons = persons['items']
        officers = getlink(self.company, 'officers')
        self.officers = officers['items']

        if self.company == {'items': []}:
            self.company = {
                'company_name': pretty,
                'company_number': 'N/A',
                'company_status': 'N/A'
            }
Exemple #7
0
    def identify_company(self, keywords, month, year, first, middle, last,
                         display):
        """"""

        count_threshold = 1
        self.matched_companies = []

        names = ['%s %s' % (first, last), display]
        if middle != '':
            names.append('%s %s %s' % (first, middle, last))

        for record in self.data:

            match_count = 0
            # data is a searchresults type, not the actual company.
            # get the actual company record, but onyl for those search results that match the name

            # look for display name first
            if filter_by_name_string(record, display) != []:
                match_display = True
            else:
                match_display = False

            # look for first last name
            if filter_by_name_string(record, '%s %s' % (first, last)) != []:
                match_fl = True
            else:
                match_fl = False

            # look for first middle last name
            if filter_by_name_string(record, '%s %s %s' %
                                     (first, middle, last)) != []:
                match_fml = True
            else:
                match_fml = False

            # only count a name match once
            if True in [match_display, match_fl, match_fml]:
                match_count += 1

            if match_count >= count_threshold:

                company = getlink(record, 'self')
                self.matched_companies.append(company)
Exemple #8
0
def check_match(i, company_search_string, month, year, first, middle, last,
                display):
    """
	get the first company that matches (in order):

		- if the search string exactly matches the company name
		- if everyword in search string in the company name
		- if previous company name matches the search string
		- if a person with significant control, matches the display name of mp
			- if the date of birth data is there, we test that too, if the name matches but
			  the dob is incorrect, then it cant be them. (michael gove has entered an incorrect dob though)
		- if an officer (who may also be a shareholder) matches the display name
			- if dob present, test that too
	"""
    title = i['title']

    # remove ltd and limited from search string, companies house dont match against it
    company_search_string_clean = company_search_string.lower().replace(
        'ltd', '').replace('limited', '').strip()

    match_count = False
    if not match_count:
        '''
		this is the hardest to match against as the search string has to match exactly the company house record.
		'''
        if title.lower() == company_search_string:
            # is an exact string match - bingo
            match_count = True

    if not match_count:
        '''
		if the number of words searched for, matches the number of words found, this matches. it's got all the words.
		'''
        if i.has_key('matches'):
            if i['matches'].has_key('title'):
                title_match_list = i['matches']['title']

                title_tuples = [(title_match_list[x], title_match_list[x + 1])
                                for x in range(0, len(title_match_list), 2)]
                number_of_search_words = len(
                    company_search_string_clean.split(' '))

                if number_of_search_words == len(title_tuples):
                    # all the words in the search string are in the company title - bingo
                    match_count = True

                # if there are 5 or more search words, lets account for one missing
                elif number_of_search_words > 5:
                    if number_of_search_words - 1 == len(title_tuples):
                        match_count = True
                else:
                    matched_words_company_title = []

                    for tup in title_tuples:
                        first_bit = tup[0] - 1
                        last_bit = tup[-1]
                        matched_words_company_title.append(
                            title[first_bit:last_bit])

                    # print matched_words_company_title
                    # not sure how to proceed from here ?

    if not match_count:
        '''
		ok, so, no name match.
		lets get the company record and check previous_names, maybe the company has changed name
		'''
        company = getlink(i, 'self')
        previous_names = []
        if company.has_key('previous_company_names'):
            for c in company['previous_company_names']:
                previous_names.append(c['name'])

            for previous in previous_names:
                if previous.lower() == company_search_string.lower():
                    match_count = True

    if not match_count:
        '''
		ok, no name matches or previous names. time to check the significant persons. these are people / companies
		that took shares at the formation of the company. subsequent investors aren't required to submit shareholder
		details, but many do. if the name of someone with significant control matches the mp, NOT the search string,
		then we can match against that instead. we verify with the date of birth, if present in companies house record.

		the companies house records arent consistent or complete, there are lots of gaps, which makes it hard to verify
		with a second value, such as date of birth or address. this is an ongoing problem.
		'''

        remove = ['mr', 'mrs', 'ms', 'miss', 'sir', 'lady', 'dr', 'rt', 'hon']

        persons = getlink(company, 'persons_with_significant_control')['items']
        for person in persons:
            # print ''
            keys = [
                'name', 'name_elements', 'date_of_birth', 'natures_of_control',
                'country_of_residence'
            ]

            # check our display name, with the name key, strip out titles and check for an exact match
            person_name = person['name'].lower()
            person_string = ''
            for w in person_name.split(' '):
                if w not in remove:
                    person_string += '%s ' % w
            person_string = person_string.strip()

            if person_string == display:
                match_count = True

    if not match_count:
        '''
		check officers
		'''
        remove = ['mr', 'mrs', 'ms', 'miss', 'sir', 'lady', 'dr', 'rt', 'hon']

        officers = getlink(company, 'officers')['items']
        for officer in officers:
            officer_name = officer['name']

            # sort out the name, it comes in as 'LAST, First'
            last_regex = re.compile('[A-Z]+, ')
            if last_regex.search(officer_name):
                last_match = last_regex.search(officer_name).group()

                first_match = officer_name.split(last_match)[-1]

                name = '%s %s' % (first_match,
                                  last_match.split(',')[0].lower())
                officer_name = name.lower()
            else:
                officer_name = officer_name.lower()

            # check our display name, with the name key, strip out titles and check for an exact match
            officer_string = ''
            for w in officer_name.split(' '):
                if w not in remove:
                    officer_string += '%s ' % w
            officer_string = officer_string.strip()

            if officer_string == display:
                match_count = True
                break

            officer_splits = officer_string.split(' ')
            display_splits = display.lower().split(' ')
            counter = 0

            # if all the display names are in the officer name, good, match that
            for sp in display_splits:
                if sp.lower() in officer_splits:
                    counter += 1

            if counter == len(display_splits):
                match_count = True
                break

            if middle != '':
                if first in officer_splits and middle in officer_splits and last in officer_splits:
                    match_count = True
                    break
            if first in officer_splits and last in officer_splits:
                match_count = True
                break

    return match_count
Exemple #9
0
    def lookup(self):

        self.donor = str(self.donor)
        company_number = None
        people_links = []
        found = False

        # ugly hack corrections
        if self.donor in ['Tresco Estate', 'James Hay', 'Think BDW Ltd']:
            self.status = 'company'

        if self.status == 'company, no 10120655':
            company_number = 10120655

        if 'Armed Forces Parliamentary Trust' == self.donor:
            self.status = 'other'
        if u'Buck’s Club 1919' in self.donor:
            self.donor = "Buck's Club 1919"
            self.status = 'members'
        if u'Pratt’s Club' in self.donor:
            self.donor = "Pratt's Club"
            self.status = 'members'
        if 'carlton club' in self.donor.lower():
            self.donor = 'Carlton Club'
            self.status = 'members'
        if 'National Liberal Club' in self.donor:
            self.donor = 'National Liberal Club'
            self.status = 'members'
        if 'The Public Interest Foundation (UK charity)' == self.donor:
            self.status = 'charity'

        # apply patches
        if self.donor in urls.keys():
            company_number = urls[self.donor].split('/')[-1]

        if self.donor in people.keys():
            people_links = people[self.donor]

        if not company_number:
            # use the supplied company number from the register of interests
            # if 'company' in self.status:
            company_number_search = re.search('registration [0-9|a-z|A-Z]+',
                                              self.status)
            if company_number_search:
                company_number = company_number_search.group().split(
                    'registration ')[-1]

                # needs padding to 8 digits, if it starts with an int
                if re.match('[0-9]', company_number):
                    company_number = '%08d' % (int(company_number))

        self.company = {
            'company_name': self.donor,
            'company_number': 'N/A',
            'company_status': 'Active'
        }
        self.persons = []
        self.officers = []
        self.link = None
        self.appointments = []

        if company_number:

            # we have a company number, no need to search for it
            self.company = getlink(
                {'links': {
                    'self': '/company/%s' % str(company_number)
                }}, 'self')
            persons = getlink(self.company, 'persons_with_significant_control')
            self.persons = persons['items']
            officers = getlink(self.company, 'officers')
            self.officers = officers['items']

            if not self.company.has_key('errors'):
                self.link = 'https://beta.companieshouse.gov.uk' + self.company[
                    'links']['self']
                found = True
            else:
                self.company = {
                    'company_name': self.donor,
                    'company_number': 'N/A',
                    'company_status': 'Active'
                }
                self.link = ''

        else:

            if 'individual' in self.status.lower(
            ) or 'private' in self.status.lower():
                # found = True
                # for individuals, we store the appointments, then the company, officers etc as children
                # of the appointment

                if people_links != []:

                    for pl in people_links:
                        bit = pl.split(
                            'https://beta.companieshouse.gov.uk')[-1]
                        appointments = getlink({'links': {
                            'self': '%s' % bit
                        }}, 'self')
                        for i in appointments['items']:
                            if i not in self.appointments:
                                self.appointments.append(i)

                    # just take the last one
                    self.link = pl
                    found = True

                for app in self.appointments:
                    # add the company, officers and persons record to appointment record
                    app['company'] = getlink(app, 'company')
                    app['officers'] = getlink(app['company'],
                                              'officers')['items']
                    app['persons_with_significant_control'] = getlink(
                        app['company'],
                        'persons_with_significant_control')['items']

            # eveything below here, should generate a company / entity
            elif 'trade' in self.status.lower():
                self.type = 'union'
                if self.donor in trade_union.keys():
                    self.donor = trade_union[self.donor]
                    found = True

            elif 'charity' in self.status.lower():
                self.type = 'charity'
                if self.donor in charities.keys():
                    self.donor = charities[self.donor]
                    found = True

            elif 'unincorporated' in self.status.lower():
                self.type = 'club'
                if self.donor in clubs.keys():
                    self.donor = clubs[self.donor]
                    found = True

            elif 'members' in self.status.lower():
                self.type = 'club'
                if self.donor in clubs.keys():
                    self.donor = clubs[self.donor]
                    found = True

            elif 'friendly' in self.status.lower():
                self.type = 'club'
                if self.donor in clubs.keys():
                    self.donor = clubs[self.donor]
                    found = True

            elif 'other' in self.status.lower():
                self.type = 'other'
                if self.donor in others.keys():
                    self.donor = others[self.donor]
                    found = True

            elif 'trust' in self.status.lower():
                self.type = 'other'
                if self.donor in others.keys():
                    self.donor = others[self.donor]
                    found = True

            elif 'provident' in self.status.lower():
                self.type = 'company'
                if self.donor in others.keys():
                    self.donor = others[self.donor]
                    found = True

            elif 'visit' in self.status:
                # TODO
                self.type = 'visit'

            else:
                # we dont have a company number, so do a company search
                if 'llp' in self.status.lower(
                ) or 'limited' in self.status.lower():
                    self.type = 'company'
                else:
                    self.type = 'other'

                # these are the remaining things to search - can only do a company search really
                companies = CompaniesHouseCompanySearch([self.donor])

                for i in companies.data:

                    # we need the name and address to fuzzy match

                    name_ratio = fuzz.token_set_ratio(i['title'].lower(),
                                                      self.donor)

                    if name_ratio > 90:

                        if i['address_snippet']:

                            addr_ratio = fuzz.token_set_ratio(
                                i['address_snippet'].lower(), self.address)

                            # if the address matches enough
                            if addr_ratio > 90:

                                self.link = 'https://beta.companieshouse.gov.uk' + i[
                                    'links']['self']
                                self.company = getlink(i, 'self')
                                persons = getlink(
                                    self.company,
                                    'persons_with_significant_control')
                                self.persons = persons['items']
                                officers = getlink(self.company, 'officers')
                                self.officers = officers['items']
                                # print 'FOUND %s: , %s' % (self.status.upper(), self.company['company_name'])
                                found = True
                                break

        # print self.donor, self.address
        # if 'sw1p 3ql' in self.address.lower():
        # 	print '*'*100
        # 	print '55 TUFTON STREET: %s' % self.donor
        # 	print '*'*100

        if found:
            pass
            # print '\tFOUND %s: %s' % (self.status.upper(), self.donor)
        else:
            # pass
            print '\tMISSING %s: %s' % (self.status.upper(), self.donor)
Exemple #10
0
    def _get_filing_history(self, data):
        """Get filing history of company from dict"""

        filing_dicts = getlink(data, 'filing_history')['items']
        for filing in filing_dicts:
            self.filing.append(CompaniesHouseFiling(filing).data)
Exemple #11
0
 def _get_appointments(self, record):
     """Get the appointments of the found officer"""
     return getlink(record, 'self')
    def do_logic(self):
        """
		OK. Here we need to do two separate things, look for officers that match the name and for companies that match the name.

		ADD TO SELF.ITEMS

		"""

        next_id = len(self.items) + 1
        item_id = '%04d' % next_id

        companies = CompaniesHouseCompanySearch(self.names)
        companies.get_data(keywords=KEYWORDS,
                           month=self.month,
                           year=self.year,
                           first=self.first,
                           middle=self.middle,
                           last=self.last,
                           display=self.display)

        if len(companies.matched_officers) > 0 or len(
                companies.matched_persons) > 0:

            for i in companies.matched_companies:
                company = getlink(i, 'self')
                # check for errors
                if not company.has_key('errors'):
                    # print '\tAdding Company Search Company : %s' % company['company_name']

                    raw_string = ' '.join(self.names)
                    pretty = self.display.title()
                    registered = ''
                    amount = 0
                    url = base_url + company['links']['self']

                    self.items.append(
                        ShareholdingsItem(item_id, self.category_id,
                                          raw_string, pretty, registered,
                                          amount, company, url))

        users = CompaniesHouseUserSearch(self.names)
        users.identify(keywords=KEYWORDS,
                       month=self.month,
                       year=self.year,
                       first=self.first,
                       middle=self.middle,
                       last=self.last,
                       display=self.display)

        for i in users.matched:

            for app in i['appointments']:
                company = getlink(app, 'company')
                # check for errors
                if not company.has_key('errors'):
                    # print '\tAdding Officer Search Company : %s' % company['company_name']

                    raw_string = ' '.join(self.names)
                    pretty = self.display.title()
                    registered = ''
                    amount = 0
                    url = base_url + company['links']['self']

                    self.items.append(
                        ShareholdingsItem(item_id, self.category_id,
                                          raw_string, pretty, registered,
                                          amount, company, url))