Exemple #1
0
def parse_activity(new_identifiers, old_xml, resource):
    for activity in parse.document(resource.document, resource):
        activity.resource = resource

        if activity.iati_identifier not in new_identifiers:
            new_identifiers.add(activity.iati_identifier)
            try:
                if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]:
                    activity.last_change_datetime = old_xml[activity.iati_identifier][0]
                else:
                    activity.last_change_datetime = datetime.datetime.now()
            except KeyError:
                activity.last_change_datetime = datetime.datetime.now()
            db.session.add(activity)
            check_for_duplicates([activity])
        else:
            parse.log.warn(
                    _("Duplicate identifier {0} in same resource document".format(
                            activity.iati_identifier),
                            logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                    exc_info=''
            )

        db.session.flush()
    db.session.commit()
def parse_activity(new_identifiers, old_xml, resource):
    for activity in parse.document(resource.document, resource):
        activity.resource = resource

        if activity.iati_identifier not in new_identifiers:
            new_identifiers.add(activity.iati_identifier)
            try:
                if hash(activity.raw_xml) == old_xml[
                        activity.iati_identifier][1]:
                    activity.last_change_datetime = old_xml[
                        activity.iati_identifier][0]
                else:
                    activity.last_change_datetime = datetime.datetime.now()
            except KeyError:
                activity.last_change_datetime = datetime.datetime.now()
            db.session.add(activity)
            check_for_duplicates([activity])
        else:
            parse.log.warn(_(
                "Duplicate identifier {0} in same resource document".format(
                    activity.iati_identifier),
                logger='activity_importer',
                dataset=resource.dataset_id,
                resource=resource.url),
                           exc_info='')

        db.session.flush()
    db.session.commit()
Exemple #3
0
    def test_policy_markers(self):
        activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]

        self.assertEquals(8, len(activities[1].policy_markers))
        self.assertEquals(cl.PolicyMarker.gender_equality, activities[1].policy_markers[0].code)
        self.assertEquals(cl.PolicyMarker.aid_to_environment, activities[1].policy_markers[1].code)
        self.assertEquals(cl.PolicyMarker.participatory_developmentgood_governance,
                activities[1].policy_markers[2].code)
        self.assertEquals(cl.PolicyMarker.trade_development, activities[1].policy_markers[3].code)
Exemple #4
0
    def test_policy_markers(self):
        activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]

        self.assertEquals(8, len(activities[1].policy_markers))
        self.assertEquals(cl.PolicyMarker.gender_equality, activities[1].policy_markers[0].code)
        self.assertEquals(cl.PolicyMarker.aid_to_environment, activities[1].policy_markers[1].code)
        self.assertEquals(cl.PolicyMarker.participatory_developmentgood_governance,
                activities[1].policy_markers[2].code)
        self.assertEquals(cl.PolicyMarker.trade_development, activities[1].policy_markers[3].code)
Exemple #5
0
def parse_resource(resource):
    db.session.add(resource)
    now = datetime.datetime.utcnow()
    current = Activity.query.filter_by(resource_url=resource.url)
    current_identifiers = set([i.iati_identifier for i in current.all()])

    old_xml = dict([(i[0], (i[1], hash(i[2]))) for i in db.session.query(
        Activity.iati_identifier, Activity.last_change_datetime,
        Activity.raw_xml).filter_by(resource_url=resource.url)])

    db.session.query(Activity).filter_by(resource_url=resource.url).delete()
    new_identifiers = set()
    activities = []
    for activity in parse.document(resource.document, resource):
        activity.resource = resource
        new_identifiers.add(activity.iati_identifier)
        try:
            if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]:
                activity.last_change_datetime = old_xml[
                    activity.iati_identifier][0]
            else:
                activity.last_change_datetime = datetime.datetime.now()
        except KeyError:
            activity.last_change_datetime = datetime.datetime.now()
        activities.append(activity)
        db.session.add(activity)
        if len(db.session.new) > 50:
            activities = check_for_duplicates(activities)
            db.session.commit()
            activities = []
    db.session.add_all(activities)
    activities = check_for_duplicates(activities)
    db.session.commit()

    resource.version = parse.document_metadata(resource.document)

    #add any identifiers that are no longer present to deleted_activity table
    diff = current_identifiers - new_identifiers
    now = datetime.datetime.utcnow()
    deleted = [
        DeletedActivity(iati_identifier=deleted_activity, deletion_date=now)
        for deleted_activity in diff
    ]
    if deleted:
        db.session.add_all(deleted)

    #remove any new identifiers from the deleted_activity table
    if new_identifiers:
        db.session.query(DeletedActivity)\
                .filter(DeletedActivity.iati_identifier.in_(new_identifiers))\
                .delete(synchronize_session="fetch")

    log.info("Parsed %d activities from %s", len(resource.activities),
             resource.url)
    resource.last_parsed = now
    return resource  #, new_identifiers
def parse_resource(resource):
    db.session.add(resource)
    Activity.query.filter_by(resource_url=resource.url).delete()
    resource.activities = list(parse.document(resource.document, resource))
    log.info(
        "Parsed %d activities from %s",
        len(resource.activities),
        resource.url)
    resource.last_parsed = datetime.datetime.utcnow()
    return resource
 def test_dates(self):
     activities = list(parse.document(fixture_filename("CD.xml")))
     self.assertEquals(datetime.date(2004, 1, 1),
                       activities[0].start_planned)
     self.assertEquals(datetime.date(2004, 1, 1),
                       activities[0].start_actual)
     self.assertEquals(datetime.date(2010, 12, 31),
                       activities[0].end_planned)
     self.assertEquals(datetime.date(2010, 12, 31),
                       activities[0].end_actual)
Exemple #8
0
 def test_missing_id(self):
     # missing activity id means don't parse
     activities = parse.document(ET.XML(
         u'''
           <iati-activities>
             <iati-activity default-currency="GBP" xml:lang="en">
                 <reporting-org ref="GB-2" type="15">CDC Group plc</reporting-org>
                 <activity-status code="2">Implementation</activity-status>
             </iati-activity>
           </iati-activities>
             '''))
     self.assertEquals(0, len(list(activities)))
Exemple #9
0
 def test_missing_id(self):
     # missing activity id means don't parse
     activities = parse.document(ET.XML(
         u'''
           <iati-activities>
             <iati-activity default-currency="GBP" xml:lang="en">
                 <reporting-org ref="GB-2" type="15">CDC Group plc</reporting-org>
                 <activity-status code="2">Implementation</activity-status>
             </iati-activity>
           </iati-activities>
             '''))
     self.assertEquals(0, len(list(activities)))
Exemple #10
0
 def test_missing_reporting_org(self):
     # missing reporting org should still parse
     activities = list(parse.document(ET.XML(
         u'''
           <iati-activities>
             <iati-activity default-currency="GBP" xml:lang="en">
                 <iati-identifier>AAA-AA</iati-identifier>
                 <activity-status code="2">Implementation</activity-status>
             </iati-activity>
           </iati-activities>
             ''')))
     self.assertEquals(1, len(activities))
     self.assertEquals(u"AAA-AA", activities[0].iati_identifier)
 def test_missing_reporting_org(self):
     # missing reporting org should still parse
     activities = list(
         parse.document(
             ET.XML(u'''
           <iati-activities>
             <iati-activity default-currency="GBP" xml:lang="en">
                 <iati-identifier>AAA-AA</iati-identifier>
                 <activity-status code="2">Implementation</activity-status>
             </iati-activity>
           </iati-activities>
             ''')))
     self.assertEquals(1, len(activities))
     self.assertEquals(u"AAA-AA", activities[0].iati_identifier)
Exemple #12
0
def parse_file(filenames, verbose=False, fail_xml=False, fail_spec=False):
    for filename in filenames:
        if verbose:
            print "Parsing", filename
        try:
            db.session.add_all(parse.document(filename))
            db.session.commit()
        except parse.ParserError, exc:
            logging.error("Could not parse file %r", filename)
            db.session.rollback()
            if isinstance(exc, parse.XMLError) and fail_xml:
                raise
            if isinstance(exc, parse.SpecError) and fail_spec:
                raise
Exemple #13
0
def parse_file(filenames, verbose=False, fail_xml=False, fail_spec=False):
    for filename in filenames:
        if verbose:
            print "Parsing", filename
        try:
            db.session.add_all(parse.document(filename))
            db.session.commit()
        except parse.ParserError, exc:
            logging.error("Could not parse file %r", filename)
            db.session.rollback()
            if isinstance(exc, parse.XMLError) and fail_xml:
                raise
            if isinstance(exc, parse.SpecError) and fail_spec:
                raise
Exemple #14
0
 def test_save_complex_example(self):
     acts = parse.document(
         fixture("complex_example_dfid.xml", encoding=None))
     db.session.add_all(acts)
     db.session.commit()
Exemple #15
0
 def test_default_language(self):
     activities = [ a for a in parse.document(fixture_filename("default_currency.xml")) ]
     self.assertEquals(cl.Language.english, activities[0].default_language) 
Exemple #16
0
 def test_default_tied_status(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.TiedStatus.untied, activities[1].default_tied_status) 
Exemple #17
0
 def test_default_flow_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.FlowType.oda, activities[1].default_flow_type)
Exemple #18
0
 def test_collaboration_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.CollaborationType.bilateral, activities[1].collaboration_type)
Exemple #19
0
 def test_related_activity(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(4, len(activities[0].related_activities))
     self.assertEquals("GB-1-105838-101", activities[0].related_activities[0].ref)
Exemple #20
0
 def test_sector_percentage_count(self):
     act = next(parse.document(
         fixture("complex_example_dfid.xml", encoding=None)))
     self.assertEquals(5, len(act.sector_percentages))
Exemple #21
0
 def test_default_language(self):
     activities = [ a for a in parse.document(fixture_filename("default_currency.xml")) ]
     self.assertEquals(cl.Language.english, activities[0].default_language) 
Exemple #22
0
 def test_default_hierarchy(self):
     activities = [ a for a in parse.document(fixture_filename("default_currency.xml")) ]
     self.assertEquals(cl.RelatedActivityType.parent, activities[0].hierarchy) 
Exemple #23
0
 def test_default_tied_status(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.TiedStatus.untied, activities[1].default_tied_status) 
Exemple #24
0
 def test_default_aid_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.AidType.projecttype_interventions,
             activities[1].default_aid_type)
Exemple #25
0
 def test_default_flow_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.FlowType.oda, activities[1].default_flow_type)
Exemple #26
0
 def test_default_finance_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.FinanceType.aid_grant_excluding_debt_reorganisation,
             activities[1].default_finance_type)
Exemple #27
0
 def test_different_roles(self):
     activities = parse.document(fixture_filename("same_orgs_different_roles.xml"))
     db.session.add_all(activities)
     db.session.commit()
Exemple #28
0
 def test_no_start_actual(self):
     activities = parse.document(fixture_filename("missing_dates.xml"))
     act = {a.iati_identifier:a for a in activities}
     self.assertEquals(None, act[u"GB-CHC-272465-680"].start_actual)
Exemple #29
0
 def test_save_complex_example(self):
     acts = parse.document(
         fixture("complex_example_dfid.xml", encoding=None))
     db.session.add_all(acts)
     db.session.commit()
Exemple #30
0
 def test_collaboration_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.CollaborationType.bilateral, activities[1].collaboration_type)
Exemple #31
0
 def test_save_repeated_participation(self):
     activities = parse.document(fixture_filename("repeated_participation.xml"))
     db.session.add_all(activities)
     db.session.commit()
Exemple #32
0
 def test_activity_status(self):
     activities = [ a for a in parse.document(fixture_filename("default_currency.xml")) ]
     self.assertEquals(cl.ActivityStatus.implementation, activities[0].activity_status)
Exemple #33
0
 def test_different_roles(self):
     activities = parse.document(fixture_filename("same_orgs_different_roles.xml"))
     db.session.add_all(activities)
     db.session.commit()
Exemple #34
0
 def test_default_finance_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.FinanceType.aid_grant_excluding_debt_reorganisation,
             activities[1].default_finance_type)
Exemple #35
0
 def test_big_values(self):
     activities = parse.document(fixture_filename("big_value.xml"))
     db.session.add_all(activities)
     db.session.commit()
Exemple #36
0
 def test_default_aid_type(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(cl.AidType.projecttype_interventions,
             activities[1].default_aid_type)
Exemple #37
0
 def test_related_activity(self):
     activities = [ a for a in parse.document(fixture_filename("CD.xml")) ]
     self.assertEquals(4, len(activities[0].related_activities))
     self.assertEquals("GB-1-105838-101", activities[0].related_activities[0].ref)
Exemple #38
0
 def test_default_hierarchy(self):
     activities = [ a for a in parse.document(fixture_filename("default_currency.xml")) ]
     self.assertEquals(cl.RelatedActivityType.parent, activities[0].hierarchy) 
Exemple #39
0
 def setUp(self):
     super(TestParse2xxActivity, self).setUp()
     self.activities = list(parse.document(fixture_filename("2.01-example-annotated.xml")))
     self.act = self.activities[0]
Exemple #40
0
 def test_save_parsed_201(self):
     activities = parse.document(fixture_filename("2.01-example-annotated.xml"))
     db.session.add_all(activities)
     db.session.commit()
def load_fix(fix_name):
    activities = parse.document(fixture_filename(fix_name))
    db.session.add_all(activities)
    db.session.commit()
Exemple #42
0
 def test_save_repeated_participation(self):
     activities = parse.document(fixture_filename("repeated_participation.xml"))
     db.session.add_all(activities)
     db.session.commit()
Exemple #43
0
def parse_resource(resource):
    db.session.add(resource)
    now = datetime.datetime.utcnow()
    current = Activity.query.filter_by(resource_url=resource.url)
    current_identifiers = set([ i.iati_identifier for i in current.all() ])

    old_xml = dict([ (i[0], (i[1], hash(i[2]))) for i in db.session.query(
        Activity.iati_identifier, Activity.last_change_datetime,
        Activity.raw_xml).filter_by(resource_url=resource.url) ])

    db.session.query(Activity).filter_by(resource_url=resource.url).delete()
    new_identifiers = set()
    activities = []
    for activity in parse.document(resource.document, resource):
        activity.resource = resource

        if activity.iati_identifier not in new_identifiers:
            new_identifiers.add(activity.iati_identifier)
            try:
                if hash(activity.raw_xml) == old_xml[activity.iati_identifier][1]:
                    activity.last_change_datetime = old_xml[activity.iati_identifier][0]
                else:
                    activity.last_change_datetime = datetime.datetime.now()
            except KeyError:
                activity.last_change_datetime = datetime.datetime.now()
            activities.append(activity)
            db.session.add(activity)
            if len(db.session.new) > 50:
                activities = check_for_duplicates(activities)
                db.session.commit()
                activities = []
        else:
            parse.log.warn(
                _("Duplicate identifier {0} in same resource document".format(
                    activity.iati_identifier),
                logger='activity_importer', dataset=resource.dataset_id, resource=resource.url),
                exc_info=''
            )
    db.session.add_all(activities)
    activities = check_for_duplicates(activities)
    db.session.commit()

    resource.version = parse.document_metadata(resource.document)

    #add any identifiers that are no longer present to deleted_activity table
    diff = current_identifiers - new_identifiers 
    now = datetime.datetime.utcnow()
    deleted = [ 
            DeletedActivity(iati_identifier=deleted_activity, deletion_date=now)
            for deleted_activity in diff ]
    if deleted:
        db.session.add_all(deleted)

    #remove any new identifiers from the deleted_activity table
    if new_identifiers:
        db.session.query(DeletedActivity)\
                .filter(DeletedActivity.iati_identifier.in_(new_identifiers))\
                .delete(synchronize_session="fetch")

    log.info(
        "Parsed %d activities from %s",
        len(resource.activities),
        resource.url)
    resource.last_parsed = now
    return resource#, new_identifiers
Exemple #44
0
 def test_big_values(self):
     activities = parse.document(fixture_filename("big_value.xml"))
     db.session.add_all(activities)
     db.session.commit()
Exemple #45
0
 def test_no_start_actual(self):
     activities = parse.document(fixture_filename("missing_dates.xml"))
     act = {a.iati_identifier:a for a in activities}
     self.assertEquals(None, act[u"GB-CHC-272465-680"].start_actual)
Exemple #46
0
 def test_dates(self):
     activities = list(parse.document(fixture_filename("CD.xml")))
     self.assertEquals(datetime.date(2004, 1, 1), activities[0].start_planned)
     self.assertEquals(datetime.date(2004, 1, 1), activities[0].start_actual)
     self.assertEquals(datetime.date(2010, 12, 31), activities[0].end_planned)
     self.assertEquals(datetime.date(2010, 12, 31), activities[0].end_actual)
Exemple #47
0
def load_fix(fix_name):
    activities = parse.document(fixture_filename(fix_name))
    db.session.add_all(activities)
    db.session.commit()
Exemple #48
0
 def setUp(self):
     super(TestParse2xxActivity, self).setUp()
     self.activities = list(parse.document(fixture_filename("2.01-example-annotated.xml")))
     self.act = self.activities[0]
Exemple #49
0
 def test_activity_status(self):
     activities = [ a for a in parse.document(fixture_filename("default_currency.xml")) ]
     self.assertEquals(cl.ActivityStatus.implementation, activities[0].activity_status)
Exemple #50
0
 def test_sector_percentage_count(self):
     act = next(parse.document(
         fixture("complex_example_dfid.xml", encoding=None)))
     self.assertEquals(5, len(act.sector_percentages))
Exemple #51
0
 def test_save_parsed_201(self):
     activities = parse.document(fixture_filename("2.01-example-annotated.xml"))
     db.session.add_all(activities)
     db.session.commit()