def update_trustees(session,
                    charity_collection,
                    lower_limit,
                    upper_limit,
                    batch_size=10000):

    logging.info("Updating trustees")
    while lower_limit <= upper_limit:
        q = session\
        .query(ExtractMainCharity.regno, ExtractTrustee)\
        .join(ExtractTrustee, ExtractTrustee.regno==ExtractMainCharity.regno)\
        .filter(ExtractMainCharity.regno >= lower_limit)\
        .filter(ExtractMainCharity.regno < lower_limit + batch_size)

        trustees = {}
        for i, x in enumerate(q):
            regno = x.ExtractTrustee.regno

            if regno not in trustees:
                trustees[regno] = []

            trustees[regno].append(
                stripped_or_none(x.ExtractTrustee.trustee, 'title'))

        requests = [
            UpdateOne({'ids.GB-CHC': i},
                      {'$set': {
                          'trustees.names': trustees[i]
                      }}) for i in trustees.keys()
        ]

        logging.info(lower_limit)

        lower_limit += batch_size

        if len(requests) == 0:
            continue

        try:
            result = charity_collection.bulk_write(requests)
    #         print(lower_limit, lower_limit + batch_size, result.matched_count)
        except BulkWriteError as bwe:
            logging.error(bwe.details)
def update_beneficiaries(session, charity_collection, lower_limit, upper_limit, batch_size=10000):

    logging.info("Updating beneficiaries.")
    while lower_limit <= upper_limit:
        q = session\
        .query(ExtractMainCharity.regno, ExtractClass.id, ExtractClassRef)\
        .join(ExtractClass, ExtractClass.regno==ExtractMainCharity.regno)\
        .join(ExtractClassRef, ExtractClassRef.classno==ExtractClass.classno)\
        .filter(ExtractClass.classno >= 200)\
        .filter(ExtractClass.classno < 300)\
        .filter(ExtractMainCharity.regno >= lower_limit)\
        .filter(ExtractMainCharity.regno < lower_limit + batch_size)

        beneficiaries = {}
        for i, x in enumerate(q):
            regno = x.regno

            if regno not in beneficiaries:
                beneficiaries[regno] = []

            beneficiaries[regno].append({
                'id': int(x.ExtractClassRef.classno),
                'name': stripped_or_none(x.ExtractClassRef.classtext, 'sentence'),
            })
            
        
        requests = [UpdateOne(
            {'ids.GB-CHC': i},
            {'$set': {'beneficiaries': beneficiaries[i]}}
        ) for i in beneficiaries.keys()] 
        
        logging.info(lower_limit)
        
        lower_limit += batch_size
        
        if len(requests) == 0:
            continue
            
        try:
            result = charity_collection.bulk_write(requests)
    #         print(lower_limit, lower_limit + batch_size, result.matched_count)
        except BulkWriteError as bwe:
            logging.error(bwe.details)
def create_subsidiary(charity):
    c = {}
    c['id'] = charity.subno
    c['name'] = stripped_or_none(charity.name, 'title')
    c['isRegistered'] = charity.orgtype and charity.orgtype.strip() == 'R'
    c['governingDoc'] = stripped_or_none(charity.gd)
    c['areaOfBenefit'] = stripped_or_none(charity.aob)
    c['contact'] = {
        'email':
        None,
        'person':
        stripped_or_none(charity.corr, 'title'),
        'phone':
        stripped_or_none(charity.phone),
        'postcode':
        stripped_or_none(charity.postcode),
        'address': [
            stripped_or_none(getattr(charity, add), 'title')
            for add in ['add1', 'add2', 'add3', 'add4', 'add5']
            if getattr(charity, add) != None
        ],
    }

    return c
def create_charity(charity, main_charity):
    c = {}
    c['regulator'] = 'GB-CHC'
    c['ids'] = {
        'charityId': 'GB-CHC-%d' % charity.regno,
        'GB-CHC': charity.regno,
    }
    c['name'] = stripped_or_none(charity.name, 'title')
    c['isRegistered'] = charity.orgtype and charity.orgtype.strip() == 'R' 
    c['governingDoc'] = stripped_or_none(charity.gd)
    c['areaOfBenefit'] = stripped_or_none(charity.aob, 'title')
    c['contact'] = {
        'email': stripped_or_none(main_charity.email, 'lower'),
        'person': stripped_or_none(charity.corr, 'title'),
        'phone': stripped_or_none(charity.phone),
        'postcode': stripped_or_none(charity.postcode),
        'address': [stripped_or_none(getattr(charity, add), 'title') for add in ['add1', 'add2', 'add3', 'add4', 'add5'] if getattr(charity, add) != None]
    }
    c['isWelsh'] = boolean_on_value(main_charity.welsh, 'T')

    c['trustees'] = {
        'incorporated': boolean_on_value(main_charity.trustees, 'T'),
        'names': []
    }
    c['website'] = stripped_or_none(main_charity.web, 'lower')
    c['isSchool'] = boolean_on_value(main_charity.grouptype, 'SCH')
    c['income'] = {
        'latest': {
            'date': main_charity.incomedate,
            'total': int(main_charity.income) if main_charity.income != None else None,
        },
        'annual': []
    }
    c['fyend'] = stripped_or_none(main_charity.fyend)
    c['companiesHouseNumber'] = stripped_or_none(main_charity.coyno)

    c['areasOfOperation'] = []
    c['causes'] = []
    c['beneficiaries'] = []
    c['operations'] = []
    c['subsidiaries'] = []
    c['alternativeNames'] = []

    c['activities'] = None
    
    return c
Exemple #5
0
def update_aoo(session,
               charity_collection,
               lower_limit,
               upper_limit,
               batch_size=10000):

    logging.info("Updating areas of operation.")
    while lower_limit <= upper_limit:
        q = session\
        .query(ExtractMainCharity.regno, ExtractCharityAoo.id, ExtractAooRef)\
        .join(ExtractCharityAoo, ExtractCharityAoo.regno==ExtractMainCharity.regno)\
        .join(ExtractAooRef, and_(ExtractAooRef.aootype==ExtractCharityAoo.aootype, ExtractAooRef.aookey==ExtractCharityAoo.aookey))\
        .filter(ExtractMainCharity.regno >= lower_limit)\
        .filter(ExtractMainCharity.regno < lower_limit + batch_size)

        aoo = {}
        for i, x in enumerate(q):
            regno = x.regno

            if regno not in aoo:
                aoo[regno] = []

            aootype = stripped_or_none(x.ExtractAooRef.aootype)

            scale = {
                'A': 'UK Division',
                'B': 'Local Authority',
                'C': 'Metropolitan County',
                'D': 'Country',
                'E': 'Continent',
            }

            parent_aoo_id = None
            if aootype in [
                    'B', 'D'
            ] and x.ExtractAooRef.master and x.ExtractAooRef.master > 0:
                parent_aoo_id = '%s-%d' % (chr(ord(aootype) + 1),
                                           x.ExtractAooRef.master)

            aoo[regno].append({
                'id':
                '%s-%d' % (x.ExtractAooRef.aootype, x.ExtractAooRef.aookey),
                'parentId':
                parent_aoo_id,
                'name':
                stripped_or_none(x.ExtractAooRef.aooname, 'title'),
                'alternativeName':
                stripped_or_none(x.ExtractAooRef.aoosort, 'title'),
                'locationType':
                scale[aootype],
                'isWelsh':
                boolean_on_value(x.ExtractAooRef.welsh, 'Y'),
            })

        requests = [
            UpdateOne({'ids.GB-CHC': i},
                      {'$set': {
                          'areasOfOperation': aoo[i]
                      }}) for i in aoo.keys()
        ]

        logging.info(lower_limit)

        lower_limit += batch_size

        if len(requests) == 0:
            continue

        try:
            result = charity_collection.bulk_write(requests)
    #         print(lower_limit, lower_limit + batch_size, result.matched_count)
        except BulkWriteError as bwe:
            logging.error(bwe.details)