Esempio n. 1
0
def parse(fh):
    doc = etree.parse(fh)
    for ap_el in doc.findall('//' + NS + 'accreditedPerson'):
        ap = {
            'org_identification_code': ap_el.findtext(NS + 'orgIdentificationCode'),
            'number_of_ir': ap_el.findtext(NS + 'numberOfIR'),
            'org_name': ap_el.findtext(NS + 'orgName'),
            'title': ap_el.findtext(NS + 'title'),
            'first_name': ap_el.findtext(NS + 'firstName'),
            'last_name': ap_el.findtext(NS + 'lastName'),
            'start_date': dateconv(ap_el.findtext(NS + 'accreditationStartDate')),
            'end_date': dateconv(ap_el.findtext(NS + 'accreditationEndDate')),
            }
        yield ap
def parse_rep(rep_el):
    rep = {}
    rep['identification_code'] = rep_el.findtext(NS + 'identificationCode')
    rep['status'] = rep_el.findtext(NS + 'status')
    rep['registration_date'] = dateconv(rep_el.findtext(NS + 'registrationDate'))
    rep['last_update_date'] = dateconv(rep_el.findtext(NS + 'lastUpdateDate'))
    rep['legal_status'] = rep_el.findtext(NS + 'legalStatus')
    rep['acronym'] = rep_el.findtext(NS + 'acronym')
    rep['original_name'] = rep_el.findtext('.//' + NS + 'originalName')
    el = rep_el.find(NS + 'webSiteURL')
    rep['web_site_url'] = el.get(NS2 + 'href') if el is not None else None
    rep['main_category'] = rep_el.findtext('.//' + NS + 'mainCategory')
    rep['sub_category'] = rep_el.findtext('.//' + NS + 'subCategory')

    legal = {}
    legal['title'] = rep_el.findtext(NS + 'legal/' + NS + 'title')
    legal['first_name'] = rep_el.findtext(NS + 'legal/' + NS +
            'firstName')
    legal['last_name'] = rep_el.findtext(NS + 'legal/' + NS +
            'lastName')
    legal['position'] = rep_el.findtext(NS + 'legal/' + NS +
            'position')
    rep['legal_person'] = legal

    head = {}
    head['title'] = rep_el.findtext(NS + 'head/' + NS + 'title')
    head['first_name'] = rep_el.findtext(NS + 'head/' + NS +
            'firstName')
    head['last_name'] = rep_el.findtext(NS + 'head/' + NS +
            'lastName')
    head['position'] = rep_el.findtext(NS + 'head/' + NS +
            'position')
    rep['head_person'] = head

    rep['contact_street'] = rep_el.findtext(NS + 'contactDetails/' + NS + 'street')
    rep['contact_number'] = rep_el.findtext(NS + 'contactDetails/' + NS + 'number')
    rep['contact_post_code'] = rep_el.findtext(NS + 'contactDetails/' + NS
            + 'postCode')
    rep['contact_town'] = rep_el.findtext(NS + 'contactDetails/' + NS
            + 'town')
    rep['contact_country'] = rep_el.findtext(NS + 'contactDetails/' + NS
            + 'country')
    rep['contact_indic_phone'] = rep_el.findtext(NS + 'contactDetails//' + NS
            + 'indicPhone')
    rep['contact_indic_fax'] = rep_el.findtext(NS + 'contactDetails//' + NS
            + 'indicFax')
    rep['contact_fax'] = rep_el.findtext(NS + 'contactDetails//' + NS
            + 'fax')
    rep['contact_phone'] = rep_el.findtext(NS + 'contactDetails//' + NS
            + 'phoneNumber')
    rep['contact_more'] = rep_el.findtext(NS + 'contactDetails/' + NS
            + 'moreContactDetails')
    rep['goals'] = rep_el.findtext(NS + 'goals')
    rep['networking'] = rep_el.findtext(NS + 'networking')
    rep['activities'] = rep_el.findtext(NS + 'activities')
    rep['code_of_conduct'] = rep_el.findtext(NS + 'codeOfConduct')
    rep['members'] = intconv(rep_el.findtext(NS + 'members'))
    rep['action_fields'] = []
    for field in rep_el.findall('.//' + NS + 'actionField/' + NS +
            'actionField'):
        rep['action_fields'].append(field.text)
    rep['interests'] = []
    for interest in rep_el.findall('.//' + NS + 'interest/' + NS +
            'name'):
        rep['interests'].append(interest.text)
    rep['number_of_natural_persons'] = intconv(rep_el.findtext('.//' + NS + 'structure/' + NS
            + 'numberOfNaturalPersons'))
    rep['number_of_organisations'] = intconv(rep_el.findtext('.//' + NS + 'structure/' + NS
            + 'numberOfOrganisations'))
    #pprint((rep['numberOfNaturalPersons'], rep['numberOfOrganisations']))
    rep['country_of_members'] = []
    el = rep_el.find(NS + 'structure/' + NS + 'countries')
    if el is not None:
        for country in el.findall('.//' + NS + 'country'):
            rep['country_of_members'].append(country.text)
    rep['organisations'] = []
    el = rep_el.find(NS + 'structure/' + NS + 'organisations')
    if el is not None:
        for org_el in el.findall(NS + 'organisation'):
            org = {}
            org['name'] = org_el.findtext(NS + 'name')
            org['number_of_members'] = org_el.findtext(NS + 'numberOfMembers')
            rep['organisations'].append(org)

    fd_el = rep_el.find(NS + 'financialData')
    fd = {}
    fd['start_date'] = dateconv(fd_el.findtext(NS + 'startDate'))
    fd['end_date'] = dateconv(fd_el.findtext(NS + 'endDate'))
    fd['eur_sources_procurement'] = intconv(fd_el.findtext(NS + 'eurSourcesProcurement'))
    fd['eur_sources_grants'] = intconv(fd_el.findtext(NS + 'eurSourcesGrants'))
    fi = fd_el.find(NS + 'financialInformation')
    fd['type'] = fi.get(SI + 'type')
    #import ipdb; ipdb.set_trace()
    fd['total_budget'] = intconv(fi.findtext('.//' + NS +
        'total_budget'))
    fd['public_financing_total'] = intconv(fi.findtext('.//' + NS +
        'totalPublicFinancing'))
    fd['public_financing_national'] = intconv(fi.findtext('.//' + NS +
        'nationalSources'))
    fd['public_financing_infranational'] = intconv(fi.findtext('.//' + NS +
        'infranationalSources'))
    cps = fi.find('.//' + NS + 'customisedPublicSources')
    fd['public_customized'] = []
    if cps is not None:
        for src_el in cps.findall(NS + 'customizedSource'):
            src = {}
            src['name'] = src_el.findtext(NS + 'name')
            src['amount'] = intconv(src_el.findtext(NS + 'amount'))
            fd['public_customized'].append(src)
    fd['other_sources_total'] = intconv(fi.findtext('.//' + NS +
        'totalOtherSources'))
    fd['other_sources_donation'] = intconv(fi.findtext('.//' + NS +
        'donation'))
    fd['other_sources_contributions'] = intconv(fi.findtext('.//' + NS +
        'contributions'))
    # TODO customisedOther
    cps = fi.find('.//' + NS + 'customisedOther')
    fd['other_customized'] = []
    if cps is not None:
        for src_el in cps.findall(NS + 'customizedSource'):
            src = {}
            src['name'] = src_el.findtext(NS + 'name')
            src['amount'] = intconv(src_el.findtext(NS + 'amount'))
            fd['other_customized'].append(src)

    fd['direct_rep_costs_min'] = intconv(fi.findtext('.//' + NS +
        'directRepresentationCosts//' + NS + 'min'))
    fd['direct_rep_costs_max'] = intconv(fi.findtext('.//' + NS +
        'directRepresentationCosts//' + NS + 'max'))
    fd['cost_min'] = intconv(fi.findtext('.//' + NS +
        'cost//' + NS + 'min'))
    fd['cost_max'] = intconv(fi.findtext('.//' + NS +
        'cost//' + NS + 'max'))
    fd['cost_absolute'] = intconv(fi.findtext('.//' + NS +
        'cost//' + NS + 'absoluteAmount'))
    fd['turnover_min'] = intconv(fi.findtext('.//' + NS +
        'turnover//' + NS + 'min'))
    fd['turnover_max'] = intconv(fi.findtext('.//' + NS +
        'turnover//' + NS + 'max'))
    fd['turnover_absolute'] = intconv(fi.findtext('.//' + NS +
        'turnover//' + NS + 'absoluteAmount'))
    tb = fi.find(NS + 'turnoverBreakdown')
    fd['turnover_breakdown'] = []
    if tb is not None:
        for range_ in tb.findall(NS + 'customersGroupsInAbsoluteRange'):
            max_ = range_.findtext('.//' + NS + 'max')
            min_ = range_.findtext('.//' + NS + 'min')
            for customer in range_.findall('.//' + NS + 'customer'):
                fd['turnover_breakdown'].append({
                    'name': customer.findtext(NS + 'name'),
                    'min': intconv(min_),
                    'max': intconv(max_)
                    })
        for range_ in tb.findall(NS + 'customersGroupsInPercentageRange'):
            # FIXME: I hate political compromises going into DB design
            # so directly.
            max_ = range_.findtext('.//' + NS + 'max')
            if max_:
                max_ = float(max_) / 100.0 * \
                        float(fd['turnover_absolute'] or
                              fd['turnover_max'] or fd['turnover_min'])
            min_ = range_.findtext('.//' + NS + 'min')
            if min_:
                min_ = float(min_) / 100.0 * \
                        float(fd['turnover_absolute'] or
                              fd['turnover_min'] or fd['turnover_max'])
            for customer in range_.findall('.//' + NS + 'customer'):
                fd['turnover_breakdown'].append({
                    'name': customer.findtext(NS + 'name'),
                    'min': intconv(min_),
                    'max': intconv(max_)
                    })
    rep['fd'] = fd
    return rep
Esempio n. 3
0
def parse_rep(rep_el):
    rep = {}
    rep['identification_code'] = rep_el.findtext(NS + 'identificationCode')
    rep['status'] = rep_el.findtext(NS + 'status')
    rep['registration_date'] = dateconv(
        rep_el.findtext(NS + 'registrationDate'))
    rep['last_update_date'] = dateconv(rep_el.findtext(NS + 'lastUpdateDate'))
    rep['legal_status'] = rep_el.findtext(NS + 'legalStatus')
    rep['acronym'] = rep_el.findtext(NS + 'acronym')
    rep['original_name'] = rep_el.findtext('.//' + NS + 'originalName')
    el = rep_el.find(NS + 'webSiteURL')
    rep['web_site_url'] = el.get(NS2 + 'href') if el is not None else None
    rep['main_category'] = rep_el.findtext('.//' + NS + 'mainCategory')
    rep['sub_category'] = rep_el.findtext('.//' + NS + 'subCategory')

    legal = {}
    legal['title'] = rep_el.findtext(NS + 'legal/' + NS + 'title')
    legal['first_name'] = rep_el.findtext(NS + 'legal/' + NS + 'firstName')
    legal['last_name'] = rep_el.findtext(NS + 'legal/' + NS + 'lastName')
    legal['position'] = rep_el.findtext(NS + 'legal/' + NS + 'position')
    rep['legal_person'] = legal

    head = {}
    head['title'] = rep_el.findtext(NS + 'head/' + NS + 'title')
    head['first_name'] = rep_el.findtext(NS + 'head/' + NS + 'firstName')
    head['last_name'] = rep_el.findtext(NS + 'head/' + NS + 'lastName')
    head['position'] = rep_el.findtext(NS + 'head/' + NS + 'position')
    rep['head_person'] = head

    rep['contact_street'] = rep_el.findtext(NS + 'contactDetails/' + NS +
                                            'street')
    rep['contact_number'] = rep_el.findtext(NS + 'contactDetails/' + NS +
                                            'number')
    rep['contact_post_code'] = rep_el.findtext(NS + 'contactDetails/' + NS +
                                               'postCode')
    rep['contact_town'] = rep_el.findtext(NS + 'contactDetails/' + NS + 'town')
    rep['contact_country'] = rep_el.findtext(NS + 'contactDetails/' + NS +
                                             'country')
    rep['contact_indic_phone'] = rep_el.findtext(NS + 'contactDetails//' + NS +
                                                 'indicPhone')
    rep['contact_indic_fax'] = rep_el.findtext(NS + 'contactDetails//' + NS +
                                               'indicFax')
    rep['contact_fax'] = rep_el.findtext(NS + 'contactDetails//' + NS + 'fax')
    rep['contact_phone'] = rep_el.findtext(NS + 'contactDetails//' + NS +
                                           'phoneNumber')
    rep['contact_more'] = rep_el.findtext(NS + 'contactDetails/' + NS +
                                          'moreContactDetails')
    rep['goals'] = rep_el.findtext(NS + 'goals')
    rep['networking'] = rep_el.findtext(NS + 'networking')
    rep['activities'] = rep_el.findtext(NS + 'activities')
    rep['code_of_conduct'] = rep_el.findtext(NS + 'codeOfConduct')
    rep['members'] = intconv(rep_el.findtext(NS + 'members'))
    rep['action_fields'] = []
    for field in rep_el.findall('.//' + NS + 'actionField/' + NS +
                                'actionField'):
        rep['action_fields'].append(field.text)
    rep['interests'] = []
    for interest in rep_el.findall('.//' + NS + 'interest/' + NS + 'name'):
        rep['interests'].append(interest.text)
    rep['number_of_natural_persons'] = intconv(
        rep_el.findtext('.//' + NS + 'structure/' + NS +
                        'numberOfNaturalPersons'))
    rep['number_of_organisations'] = intconv(
        rep_el.findtext('.//' + NS + 'structure/' + NS +
                        'numberOfOrganisations'))
    #pprint((rep['numberOfNaturalPersons'], rep['numberOfOrganisations']))
    rep['country_of_members'] = []
    el = rep_el.find(NS + 'structure/' + NS + 'countries')
    if el is not None:
        for country in el.findall('.//' + NS + 'country'):
            rep['country_of_members'].append(country.text)
    rep['organisations'] = []
    el = rep_el.find(NS + 'structure/' + NS + 'organisations')
    if el is not None:
        for org_el in el.findall(NS + 'organisation'):
            org = {}
            org['name'] = org_el.findtext(NS + 'name')
            org['number_of_members'] = org_el.findtext(NS + 'numberOfMembers')
            rep['organisations'].append(org)

    fd_el = rep_el.find(NS + 'financialData')
    fd = {}
    fd['start_date'] = dateconv(fd_el.findtext(NS + 'startDate'))
    fd['end_date'] = dateconv(fd_el.findtext(NS + 'endDate'))
    fd['eur_sources_procurement'] = intconv(
        fd_el.findtext(NS + 'eurSourcesProcurement'))
    fd['eur_sources_grants'] = intconv(fd_el.findtext(NS + 'eurSourcesGrants'))
    fi = fd_el.find(NS + 'financialInformation')
    fd['type'] = fi.get(SI + 'type')
    #import ipdb; ipdb.set_trace()
    fd['total_budget'] = intconv(fi.findtext('.//' + NS + 'total_budget'))
    fd['public_financing_total'] = intconv(
        fi.findtext('.//' + NS + 'totalPublicFinancing'))
    fd['public_financing_national'] = intconv(
        fi.findtext('.//' + NS + 'nationalSources'))
    fd['public_financing_infranational'] = intconv(
        fi.findtext('.//' + NS + 'infranationalSources'))
    cps = fi.find('.//' + NS + 'customisedPublicSources')
    fd['public_customized'] = []
    if cps is not None:
        for src_el in cps.findall(NS + 'customizedSource'):
            src = {}
            src['name'] = src_el.findtext(NS + 'name')
            src['amount'] = intconv(src_el.findtext(NS + 'amount'))
            fd['public_customized'].append(src)
    fd['other_sources_total'] = intconv(
        fi.findtext('.//' + NS + 'totalOtherSources'))
    fd['other_sources_donation'] = intconv(fi.findtext('.//' + NS +
                                                       'donation'))
    fd['other_sources_contributions'] = intconv(
        fi.findtext('.//' + NS + 'contributions'))
    # TODO customisedOther
    cps = fi.find('.//' + NS + 'customisedOther')
    fd['other_customized'] = []
    if cps is not None:
        for src_el in cps.findall(NS + 'customizedSource'):
            src = {}
            src['name'] = src_el.findtext(NS + 'name')
            src['amount'] = intconv(src_el.findtext(NS + 'amount'))
            fd['other_customized'].append(src)

    fd['direct_rep_costs_min'] = intconv(
        fi.findtext('.//' + NS + 'directRepresentationCosts//' + NS + 'min'))
    fd['direct_rep_costs_max'] = intconv(
        fi.findtext('.//' + NS + 'directRepresentationCosts//' + NS + 'max'))
    fd['cost_min'] = intconv(fi.findtext('.//' + NS + 'cost//' + NS + 'min'))
    fd['cost_max'] = intconv(fi.findtext('.//' + NS + 'cost//' + NS + 'max'))
    fd['cost_absolute'] = intconv(
        fi.findtext('.//' + NS + 'cost//' + NS + 'absoluteAmount'))
    fd['turnover_min'] = intconv(
        fi.findtext('.//' + NS + 'turnover//' + NS + 'min'))
    fd['turnover_max'] = intconv(
        fi.findtext('.//' + NS + 'turnover//' + NS + 'max'))
    fd['turnover_absolute'] = intconv(
        fi.findtext('.//' + NS + 'turnover//' + NS + 'absoluteAmount'))
    tb = fi.find(NS + 'turnoverBreakdown')
    fd['turnover_breakdown'] = []
    if tb is not None:
        for range_ in tb.findall(NS + 'customersGroupsInAbsoluteRange'):
            max_ = range_.findtext('.//' + NS + 'max')
            min_ = range_.findtext('.//' + NS + 'min')
            for customer in range_.findall('.//' + NS + 'customer'):
                fd['turnover_breakdown'].append({
                    'name':
                    customer.findtext(NS + 'name'),
                    'min':
                    intconv(min_),
                    'max':
                    intconv(max_)
                })
        for range_ in tb.findall(NS + 'customersGroupsInPercentageRange'):
            # FIXME: I hate political compromises going into DB design
            # so directly.
            max_ = range_.findtext('.//' + NS + 'max')
            if max_:
                max_ = float(max_) / 100.0 * \
                        float(fd['turnover_absolute'] or
                              fd['turnover_max'] or fd['turnover_min'])
            min_ = range_.findtext('.//' + NS + 'min')
            if min_:
                min_ = float(min_) / 100.0 * \
                        float(fd['turnover_absolute'] or
                              fd['turnover_min'] or fd['turnover_max'])
            for customer in range_.findall('.//' + NS + 'customer'):
                fd['turnover_breakdown'].append({
                    'name':
                    customer.findtext(NS + 'name'),
                    'min':
                    intconv(min_),
                    'max':
                    intconv(max_)
                })
    rep['fd'] = fd
    return rep