예제 #1
0
 def test_insert(self):
   organization = Organization(externalId='myorg', displayName='myorg_display',
                               hpoId=PITT_HPO_ID, isObsolete=1)
   created_organization = self.organization_dao.insert(organization)
   new_organization = self.organization_dao.get(created_organization.organizationId)
   organization.organizationId = created_organization.organizationId
   organization.isObsolete = new_organization.isObsolete
   self.assertEquals(organization.asdict(), new_organization.asdict())
예제 #2
0
    def _setup_hpos(self, org_dao=None):
        hpo_dao = HPODao()
        hpo_dao.insert(
            HPO(hpoId=UNSET_HPO_ID,
                name='UNSET',
                displayName='Unset',
                organizationType=OrganizationType.UNSET))
        hpo_dao.insert(
            HPO(hpoId=PITT_HPO_ID,
                name='PITT',
                displayName='Pittsburgh',
                organizationType=OrganizationType.HPO))
        hpo_dao.insert(
            HPO(hpoId=AZ_HPO_ID,
                name='AZ_TUCSON',
                displayName='Arizona',
                organizationType=OrganizationType.HPO))
        self.hpo_id = PITT_HPO_ID

        org_dao = OrganizationDao()
        org_dao.insert(
            Organization(organizationId=AZ_ORG_ID,
                         externalId='AZ_TUCSON_BANNER_HEALTH',
                         displayName='Banner Health',
                         hpoId=AZ_HPO_ID))

        created_org = org_dao.insert(
            Organization(organizationId=PITT_ORG_ID,
                         externalId='PITT_BANNER_HEALTH',
                         displayName='PITT display Banner Health',
                         hpoId=PITT_HPO_ID))
        self.organization_id = created_org.organizationId

        site_dao = SiteDao()
        created_site = site_dao.insert(
            Site(siteName='Monroeville Urgent Care Center',
                 googleGroup='hpo-site-monroeville',
                 mayolinkClientNumber=7035769,
                 organizationId=PITT_ORG_ID,
                 hpoId=PITT_HPO_ID))
        self.site_id = created_site.siteId
        site_dao.insert(
            Site(siteName='Phoenix Urgent Care Center',
                 googleGroup='hpo-site-bannerphoenix',
                 mayolinkClientNumber=7035770,
                 organizationId=PITT_ORG_ID,
                 hpoId=PITT_HPO_ID))

        site_dao.insert(
            Site(siteName='Phoenix clinic',
                 googleGroup='hpo-site-clinic-phoenix',
                 mayolinkClientNumber=7035770,
                 organizationId=AZ_ORG_ID,
                 hpoId=AZ_HPO_ID))
    def _update_organization(self, hierarchy_org_obj):
        if hierarchy_org_obj.id is None:
            raise BadRequest('No id found in payload data.')
        organization_id = self._get_value_from_identifier(
            hierarchy_org_obj,
            HIERARCHY_CONTENT_SYSTEM_PREFIX + 'organization-id')
        if organization_id is None:
            raise BadRequest(
                'No organization-identifier info found in payload data.')
        is_obsolete = ObsoleteStatus(
            'OBSOLETE') if not hierarchy_org_obj.active else None
        resource_id = self._get_reference(hierarchy_org_obj)

        hpo = self.hpo_dao.get_by_resource_id(resource_id)
        if hpo is None:
            raise BadRequest(
                'Invalid partOf reference {} importing organization {}'.format(
                    resource_id, organization_id))

        entity = Organization(externalId=organization_id.upper(),
                              displayName=hierarchy_org_obj.name,
                              hpoId=hpo.hpoId,
                              isObsolete=is_obsolete,
                              resourceId=hierarchy_org_obj.id)
        existing_map = {
            entity.externalId: entity
            for entity in self.organization_dao.get_all()
        }
        existing_entity = existing_map.get(entity.externalId)
        with self.organization_dao.session() as session:
            if existing_entity:
                new_dict = entity.asdict()
                new_dict['organizationId'] = None
                existing_dict = existing_entity.asdict()
                existing_dict['organizationId'] = None
                if existing_dict == new_dict:
                    logging.info('Not updating {}.'.format(
                        new_dict['externalId']))
                else:
                    existing_entity.displayName = entity.displayName
                    existing_entity.hpoId = entity.hpoId
                    existing_entity.isObsolete = entity.isObsolete
                    existing_entity.resourceId = entity.resourceId
                    self.organization_dao.update_with_session(
                        session, existing_entity)
            else:
                self.organization_dao.insert_with_session(session, entity)
        org_id = self.organization_dao.get_by_external_id(
            organization_id.upper()).organizationId
        bq_organization_update_by_id(org_id)
 def _create_org(self, id_):
     org = Organization(organizationId=id_,
                        externalId=id_,
                        displayName=id_,
                        hpoId=UNSET_HPO_ID)
     self.org_dao.insert(org)
     return org
 def _make_org(self, hpo, int_id, external_id):
     org = Organization(organizationId=int_id,
                        externalId=external_id,
                        displayName='SOME ORG',
                        hpoId=hpo.hpoId)
     self.org_dao.insert(org)
     return org
예제 #6
0
def init():
    handler = pugsql.module('assets/sql')
    handler.connect('sqlite:///memory')
    settings.sql = handler

    # Create table if they don't exist
    handler.org_create()
    handler.user_create()
    handler.prof_create()
    handler.fund_create()
    handler.stat_create()
    handler.value_create()
    handler.role_create()
    handler.role_user_create()

    # Add test rows if they don't exist
    org = handler.org_find(id=1)
    if (org == None):
        org = Organization.add('Test Company')
        admin = User.add('admin', 'test123')
        guest = User.add('guest', 'test123')
        admin_role = Role.add('admin')
        admin.add_to_role(role_id=admin_role.id)
        profile = Profile.add('Test Profile', org.id)
        fund = Fund.add('Test Fund', 'Test Manager', 2000, 0.00, 0.00,
                        profile.id)

    # return the queries handler
    return handler
예제 #7
0
  def test_participant_pairing_updates_onchange(self):
    provider_link = '[{"organization": {"reference": "Organization/AZ_TUCSON"}, "primary": true}]'
    TIME = datetime.datetime(2018, 1, 1)
    TIME2 = datetime.datetime(2018, 1, 2)
    insert_org = self.organization_dao.insert(
      Organization(externalId='tardis', displayName='bluebox', hpoId=PITT_HPO_ID))

    with FakeClock(TIME):
      self.participant_dao.insert(Participant(participantId=1, biobankId=2))
      participant = self.participant_dao.get(1)
      participant.organizationId = insert_org.organizationId
      self.participant_dao.update(participant)

      self.assertEquals(participant.hpoId, insert_org.hpoId)
      participant = self.participant_dao.get(1)
      p_summary = self.ps_dao.insert(self.participant_summary(participant))

    with FakeClock(TIME2):
      insert_org.hpoId = AZ_HPO_ID
      self.organization_dao.update(insert_org)

    new_org = self.organization_dao.get_by_external_id('tardis')
    ps = self.ps_dao.get(p_summary.participantId)
    ph = self.ps_history.get([participant.participantId, 2])
    participant = self.participant_dao.get(1)

    self.assertEquals(ps.lastModified, TIME2)
    self.assertEquals(ps.hpoId, new_org.hpoId)
    self.assertEquals(ph.hpoId, insert_org.hpoId)
    self.assertEquals(ph.organizationId, insert_org.organizationId)
    self.assertEquals(new_org.hpoId, participant.hpoId)
    self.assertEquals(new_org.organizationId, participant.organizationId)
    self.assertIsNone(participant.siteId)
    self.assertEquals(participant.providerLink, provider_link)
예제 #8
0
    def _setup_data(self):
        organization_dao = OrganizationDao()
        site_dao = SiteDao()
        org_1 = organization_dao.insert(
            Organization(externalId='ORG_1',
                         displayName='Organization 1',
                         hpoId=PITT_HPO_ID,
                         resourceId='o123456'))
        organization_dao.insert(
            Organization(externalId='AARDVARK_ORG',
                         displayName='Aardvarks Rock',
                         hpoId=PITT_HPO_ID,
                         resourceId='o123457'))

        site_dao.insert(
            Site(siteName='Site 1',
                 googleGroup='hpo-site-1',
                 mayolinkClientNumber=123456,
                 organizationId=org_1.organizationId,
                 siteStatus=SiteStatus.ACTIVE,
                 enrollingStatus=EnrollingStatus.ACTIVE,
                 launchDate=datetime.datetime(2016, 1, 1),
                 notes='notes',
                 latitude=12.1,
                 longitude=13.1,
                 directions='directions',
                 physicalLocationName='locationName',
                 address1='address1',
                 address2='address2',
                 city='Austin',
                 state='TX',
                 zipCode='78751',
                 phoneNumber='555-555-5555',
                 adminEmails='[email protected], [email protected]',
                 link='http://www.example.com'))
        site_dao.insert(
            Site(siteName='Zebras Rock',
                 googleGroup='aaaaaaa',
                 organizationId=org_1.organizationId,
                 enrollingStatus=EnrollingStatus.INACTIVE,
                 siteStatus=SiteStatus.INACTIVE))
예제 #9
0
    def renderFund(self, fund_id):
        fund = Fund.find(fund_id)
        profile = Profile.find(fund.prof)
        organization = Organization.find(profile.org)

        # these stats are inputs and show up in the top part of the page
        stats_beta = ['Beta']
        stats_controlled = ['Alpha', 'RM', 'RF']
        stats_curves = ['c_rate', 'd_rate']

        # these stats are calculated and show up after calc is clicked
        stats_calculated = [
            'growth_rate', 'NAV', 'Unfunded', 'Called', 'Distributed'
        ]

        stats = {}
        x = None
        for stat_name in stats_beta + stats_controlled + stats_curves + stats_calculated:
            stat = Stat.find_by_name(stat_name, fund_id, 'db')

            if not stat == None:
                stats[stat_name] = {
                    'y': stat.get_values()[1],
                    # 'color_line': stat.color_line,
                    # 'color_fill': stat.color_fill
                    'color_line': settings.colors[stat_name]['color_line'],
                    'color_fill': settings.colors[stat_name]['color_fill']
                }
                if x == None:
                    x = stat.get_values()[0]

            elif stat_name in stats_controlled + stats_beta + stats_curves:
                stats[stat_name] = {
                    'y': [1 for x in range(6)],
                    'color_line': settings.colors[stat_name]['color_line'],
                    'color_fill': settings.colors[stat_name]['color_fill']
                }
                if x == None:
                    x = [x for x in range(6)]

        return {
            'fund_name': fund.fund_name,
            'fund': fund_id,
            'prof_name': profile.prof_name,
            'org_name': organization.org_name,
            'x': x,
            'stats': stats,
            'stats_beta': stats_beta,
            'stats_curves': stats_curves,
            'stats_controlled': stats_controlled,
            'stats_calculated': stats_calculated
        }
예제 #10
0
 def post(self):
     post_data = self.__check_request(request.form)
     source = string.digits + string.ascii_letters
     token = ''.join(map(str, [source[randint(0, len(source) -1) ] for x in range(0,18)]))
     org = Organization.find(1)
     user = User(name=post_data['name'],
                 mail_address=post_data['mail_address'],
                 password=post_data['password'],
                 token=token,
                 organization_id=org.id
     )
     user.insert()
     return jsonify(status=200, message='ok', request=request.form, response={'token':token})
예제 #11
0
 def _setup_unset_enrollment_site(self):
     site_dao = SiteDao()
     organization_dao = OrganizationDao()
     org_2 = organization_dao.insert(
         Organization(externalId='ORG_2',
                      displayName='Organization 2',
                      hpoId=PITT_HPO_ID))
     site_dao.insert(
         Site(siteName='not enrolling site',
              googleGroup='not_enrolling_dot_com',
              organizationId=org_2.organizationId,
              enrollingStatus=EnrollingStatus.UNSET,
              siteStatus=SiteStatus.INACTIVE))
예제 #12
0
  def test_participant_different_hpo_does_not_change(self):
    insert_org = self.organization_dao.insert(
      Organization(externalId='stark_industries', displayName='ironman', hpoId=PITT_HPO_ID))

    self.participant_dao.insert(Participant(participantId=1, biobankId=2))
    participant = self.participant_dao.get(1)
    participant.hpoId = UNSET_HPO_ID
    self.participant_dao.update(participant)
    insert_org.hpoId = AZ_HPO_ID
    self.organization_dao.update(insert_org)
    new_org = self.organization_dao.get_by_external_id('stark_industries')
    participant = self.participant_dao.get(1)
    self.assertNotEqual(new_org.hpoId, participant.hpoId)
    self.assertEqual(new_org.hpoId, AZ_HPO_ID)
    self.assertEqual(participant.hpoId, UNSET_HPO_ID)
 def _entity_from_row(self, row):
     hpo = self.hpo_dao.get_by_name(
         row[ORGANIZATION_AWARDEE_ID_COLUMN].upper())
     if hpo is None:
         logging.warn('Invalid awardee ID %s importing organization %s',
                      row[ORGANIZATION_AWARDEE_ID_COLUMN],
                      row[ORGANIZATION_ORGANIZATION_ID_COLUMN])
         self.errors.append(
             'Invalid awardee ID {} importing organization {}'.format(
                 row[ORGANIZATION_AWARDEE_ID_COLUMN],
                 row[ORGANIZATION_ORGANIZATION_ID_COLUMN]))
         return None
     return Organization(
         externalId=row[ORGANIZATION_ORGANIZATION_ID_COLUMN].upper(),
         displayName=row[ORGANIZATION_NAME_COLUMN],
         hpoId=hpo.hpoId)
예제 #14
0
 def post(self):
     post_data = self.__check_request(request.form)
     source = string.digits + string.ascii_letters
     token = ''.join(
         map(str,
             [source[randint(0,
                             len(source) - 1)] for x in range(0, 18)]))
     org = Organization.find(1)
     user = User(name=post_data['name'],
                 mail_address=post_data['mail_address'],
                 password=post_data['password'],
                 token=token,
                 organization_id=org.id)
     user.insert()
     return jsonify(status=200,
                    message='ok',
                    request=request.form,
                    response={'token': token})
    def get_person_organization(self, person_id=None, organization_url=None):
        url = ("%skp020.asp?KPLFDNR=%s&history=true"
               % (self.config['scraper']['base_url'], person_id))

        logging.info("Getting person organization from %s", url)
        # Stupid re-try concept because AllRis sometimes misses start < at
        # tags at first request.
        try_counter = 0
        while True:
            try:
                response = self.get_url(url)
                if not url:
                    return
                tree = html.fromstring(response.text)

                memberships = []
                person = Person(originalId=person_id)
                # maps name of type to form name and membership type
                type_map = {
                    u'Rat der Stadt' : {'mtype' : 'parliament',
                                        'field' : 'PALFDNR'},
                    u'Parlament' : {'mtype' : 'parliament',
                                    'field' : 'PALFDNR'},
                    u'Fraktion' : {'mtype' : 'organisation',
                                   'field' : 'FRLFDNR'},
                    'Fraktionen': {'mtype' : 'parliament', 'field' : 'FRLFDNR'},
                    u'Ausschüsse' : {'mtype' : 'organization',
                                     'field' : 'AULFDNR'},
                    'Stadtbezirk': {'mtype' : 'parliament',
                                    'field' : 'PALFDNR'},
                    'BVV': {'mtype' : 'parliament', 'field' : 'PALFDNR'},
                    'Bezirksparlament': {'mtype' : 'parliament',
                                         'field' : 'PALFDNR'},
                    'Bezirksverordnetenversammlung': {'mtype' : 'parliament',
                                                      'field' : 'PALFDNR'}
                }

                # obtain the table with the membership list via a simple state machine
                mtype = "parliament"
                field = 'PALFDNR'
                # for checking if it changes
                old_group_id = None
                # for checking if it changes
                old_group_name = None
                # might break otherwise
                group_id = None
                table = tree.xpath('//*[@id="rismain_raw"]/table[2]')
                if len(table):
                    table = table[0]
                    for line in table.findall("tr"):
                        if line[0].tag == "th":
                            what = line[0].text.strip()
                            field = None
                            field_list = None
                            if what in type_map:
                                mtype = type_map[what]['mtype']
                                field = type_map[what]['field']
                            elif 'Wahlperiode' in what:
                                mtype = 'parliament'
                                # 'FRLFDNR'
                                field_list = ['KPLFDNR', 'AULFDNR']
                            elif "Auskünfte gemäß BVV" in what:
                                break
                            else:
                                logging.error("Unknown organization type %s "
                                              "at person detail page %s",
                                              what, person_id)
                                continue
                        else:
                            if "Keine Information" in line.text_content():
                                # skip because no content is available
                                continue

                            # Empty line = strange stuff comes after this
                            if len(list(line)) < 2:
                                break

                            # first get the name of group
                            group_name = line[1].text_content()
                            organization = Organization(name=group_name)

                            organization.classification = mtype

                            # Now the first col might be a form with more
                            # useful information which will carry through
                            # until we find another one.
                            # With it. we still check the name though.
                            form = line[0].find("form")
                            if form is not None:
                                if field:
                                    group_id = int(form.find(
                                        "input[@name='%s']" % field).get(
                                            "value"))
                                elif field_list:
                                    for field in field_list:
                                        temp_form = form.find(
                                            "input[@name='%s']" % field)
                                        if temp_form is not None:
                                            group_id = int(temp_form.get(
                                                "value"))
                                organization.originalId = group_id
                                # remember it for next loop
                                old_group_id = group_id
                                # remember it for next loop
                                old_group_name = group_name
                            else:
                                # We did not find a form. We assume that the
                                # old group still applies but we nevertheless
                                # check if the groupname is still the same.
                                if old_group_name != group_name:
                                    logging.warn("Group name differs but we "
                                                 "didn't get a form with new "
                                                 "group id: group name=%s, old "
                                                 "group name=%s, old group "
                                                 "id=%s at url %s",
                                                 group_name, old_group_name,
                                                 old_group_id, url)
                                    organization.originalId = None
                                else:
                                    organization.originalId = old_group_id
                            membership = Membership(organization=organization)
                            membership.originalId = (unicode(person_id) + '-'
                                                     + unicode(group_id))

                            # TODO: create a list of functions so we can
                            #       index them somehow
                            function = line[2].text_content()
                            raw_date = line[3].text_content()
                            # parse the date information
                            if "seit" in raw_date:
                                dparts = raw_date.split()
                                membership.endDate = dparts[-1]
                            elif "Keine" in raw_date or not raw_date.strip():
                                # no date information available
                                start_date = end_date = None
                            else:
                                dparts = raw_date.split()
                                membership.startDate = dparts[0]
                                membership.endDate = dparts[-1]
                            if organization.originalId is not None:
                                memberships.append(membership)
                            else:
                                logging.warn("Bad organization at %s", url)

                    person.membership = memberships
                    oid = self.db.save_person(person)
                    return
                else:
                    logging.info("table missing, nothing to do at %s", url)
                    return
            except AttributeError:
                if try_counter < 3:
                    logging.info("Try again: Getting person organizations with "
                                 "person id %d from %s", person_id, url)
                    try_counter += 1
                else:
                    logging.error("Failed getting person organizations with "
                                  "person id %d from %s", person_id, url)
                    return
예제 #16
0
    def find_person(self):
        find_person_url = (self.config['scraper']['base_url'] +
                           'kp041.asp?template=xyz&selfaction=ws&showAll=true&'
                           'PALFDNRM=1&kpdatfil=&filtdatum=filter&kpname=&'
                           'kpsonst=&kpampa=99999999&kpfr=99999999&'
                           'kpamfr=99999999&kpau=99999999&kpamau=99999999&'
                           'searchForm=true&search=Suchen')
        logging.info("Getting person overview from %s", find_person_url)
        """parse an XML file and return the tree"""
        parser = etree.XMLParser(recover=True)
        r = self.get_url(find_person_url)
        if not r:
            return
        xml = r.text.encode('ascii', 'xmlcharrefreplace')
        tree = etree.fromstring(xml, parser=parser)
        h = HTMLParser.HTMLParser()

        # element 0 is the special block
        # element 1 is the list of persons
        for node in tree[1].iterchildren():
            elem = {}
            for e in node.iterchildren():
                if e.text:
                    elem[e.tag] = h.unescape(e.text)
                else:
                    elem[e.tag] = ''

            # now retrieve person details such as organization memberships etc.
            # we also get the age (but only that, no date of birth)
            person = Person(originalId=int(elem['kplfdnr']))
            if elem['link_kp']:
                person.originalUrl = elem['link_kp']
            # personal information

            if elem['adtit']:
                person.title = elem['adtit']
            if elem['antext1'] == 'Frau':
                person.sex = 1
            elif elem['antext1'] == 'Herr':
                person.sex = 2
            if elem['advname']:
                person.firstname = elem['advname']
            if elem['adname']:
                person.lastname = elem['adname']

            # address
            if elem['adstr']:
                person.address = elem['adstr']
            if elem['adhnr']:
                person.house_number = elem['adhnr']
            if elem['adplz']:
                person.postalcode = elem['adplz']
            if elem['adtel']:
                person.phone = elem['adtel']

            # contact
            if elem['adtel']:
                person.phone = elem['adtel']
            if elem['adtel2']:
                person.mobile = elem['adtel2']
            if elem['adfax']:
                person.fax = elem['adfax']
            if elem['adfax']:
                person.fax = elem['adfax']
            if elem['ademail']:
                person.email = elem['ademail']
            if elem['adwww1']:
                person.website = elem['adwww1']

            person_party = elem['kppartei']
            if person_party:
                if person_party in self.config['scraper']['party_alias']:
                    person_party = self.config['scraper']['party_alias'][
                        person_party]
                new_organization = Organization(originalId=person_party,
                                                name=person_party,
                                                classification='party')
                original_id = unicode(person.originalId) + '-' + person_party
                person.membership = [
                    Membership(originalId=original_id,
                               organization=new_organization)
                ]

            if elem['link_kp'] is not None:
                if hasattr(self, 'person_queue'):
                    self.person_queue.add(person.originalId)
            else:
                logging.info("Person %s %s has no link", person.firstname,
                             person.lastname)
            self.db.save_person(person)
예제 #17
0
    def get_person_organization(self, person_id=None, organization_url=None):
        url = ("%skp020.asp?KPLFDNR=%s&history=true" %
               (self.config['scraper']['base_url'], person_id))

        logging.info("Getting person organization from %s", url)
        # Stupid re-try concept because AllRis sometimes misses start < at
        # tags at first request.
        try_counter = 0
        while True:
            try:
                response = self.get_url(url)
                if not url:
                    return
                tree = html.fromstring(response.text)

                memberships = []
                person = Person(originalId=person_id)
                # maps name of type to form name and membership type
                type_map = {
                    u'Rat der Stadt': {
                        'mtype': 'parliament',
                        'field': 'PALFDNR'
                    },
                    u'Parlament': {
                        'mtype': 'parliament',
                        'field': 'PALFDNR'
                    },
                    u'Fraktion': {
                        'mtype': 'organisation',
                        'field': 'FRLFDNR'
                    },
                    'Fraktionen': {
                        'mtype': 'parliament',
                        'field': 'FRLFDNR'
                    },
                    u'Ausschüsse': {
                        'mtype': 'organization',
                        'field': 'AULFDNR'
                    },
                    'Stadtbezirk': {
                        'mtype': 'parliament',
                        'field': 'PALFDNR'
                    },
                    'BVV': {
                        'mtype': 'parliament',
                        'field': 'PALFDNR'
                    },
                    'Bezirksparlament': {
                        'mtype': 'parliament',
                        'field': 'PALFDNR'
                    },
                    'Bezirksverordnetenversammlung': {
                        'mtype': 'parliament',
                        'field': 'PALFDNR'
                    }
                }

                # obtain the table with the membership list via a simple state machine
                mtype = "parliament"
                field = 'PALFDNR'
                # for checking if it changes
                old_group_id = None
                # for checking if it changes
                old_group_name = None
                # might break otherwise
                group_id = None
                table = tree.xpath('//*[@id="rismain_raw"]/table[2]')
                if len(table):
                    table = table[0]
                    for line in table.findall("tr"):
                        if line[0].tag == "th":
                            what = line[0].text.strip()
                            field = None
                            field_list = None
                            if what in type_map:
                                mtype = type_map[what]['mtype']
                                field = type_map[what]['field']
                            elif 'Wahlperiode' in what:
                                mtype = 'parliament'
                                # 'FRLFDNR'
                                field_list = ['KPLFDNR', 'AULFDNR']
                            elif "Auskünfte gemäß BVV" in what:
                                break
                            else:
                                logging.error(
                                    "Unknown organization type %s "
                                    "at person detail page %s", what,
                                    person_id)
                                continue
                        else:
                            if "Keine Information" in line.text_content():
                                # skip because no content is available
                                continue

                            # Empty line = strange stuff comes after this
                            if len(list(line)) < 2:
                                break

                            # first get the name of group
                            group_name = line[1].text_content()
                            organization = Organization(name=group_name)

                            organization.classification = mtype

                            # Now the first col might be a form with more
                            # useful information which will carry through
                            # until we find another one.
                            # With it. we still check the name though.
                            form = line[0].find("form")
                            if form is not None:
                                if field:
                                    group_id = int(
                                        form.find("input[@name='%s']" %
                                                  field).get("value"))
                                elif field_list:
                                    for field in field_list:
                                        temp_form = form.find(
                                            "input[@name='%s']" % field)
                                        if temp_form is not None:
                                            group_id = int(
                                                temp_form.get("value"))
                                organization.originalId = group_id
                                # remember it for next loop
                                old_group_id = group_id
                                # remember it for next loop
                                old_group_name = group_name
                            else:
                                # We did not find a form. We assume that the
                                # old group still applies but we nevertheless
                                # check if the groupname is still the same.
                                if old_group_name != group_name:
                                    logging.warn(
                                        "Group name differs but we "
                                        "didn't get a form with new "
                                        "group id: group name=%s, old "
                                        "group name=%s, old group "
                                        "id=%s at url %s", group_name,
                                        old_group_name, old_group_id, url)
                                    organization.originalId = None
                                else:
                                    organization.originalId = old_group_id
                            membership = Membership(organization=organization)
                            membership.originalId = (unicode(person_id) + '-' +
                                                     unicode(group_id))

                            # TODO: create a list of functions so we can
                            #       index them somehow
                            function = line[2].text_content()
                            raw_date = line[3].text_content()
                            # parse the date information
                            if "seit" in raw_date:
                                dparts = raw_date.split()
                                membership.endDate = dparts[-1]
                            elif "Keine" in raw_date or not raw_date.strip():
                                # no date information available
                                start_date = end_date = None
                            else:
                                dparts = raw_date.split()
                                membership.startDate = dparts[0]
                                membership.endDate = dparts[-1]
                            if organization.originalId is not None:
                                memberships.append(membership)
                            else:
                                logging.warn("Bad organization at %s", url)

                    person.membership = memberships
                    oid = self.db.save_person(person)
                    return
                else:
                    logging.info("table missing, nothing to do at %s", url)
                    return
            except AttributeError:
                if try_counter < 3:
                    logging.info(
                        "Try again: Getting person organizations with "
                        "person id %d from %s", person_id, url)
                    try_counter += 1
                else:
                    logging.error(
                        "Failed getting person organizations with "
                        "person id %d from %s", person_id, url)
                    return
예제 #18
0
 def get_person_organization(self, person_organization_url=None, person_id=None):
   """
   Load committee details for the given detail page URL or numeric ID
   """
   # Read either committee_id or committee_url from the opposite
   if person_id is not None:
     person_committee_url = self.urls['PERSON_ORGANIZATION_PRINT_PATTERN'] % (self.config['scraper']['base_url'], person_id)
   elif person_organization_url is not None:
     parsed = parse.search(self.urls['PERSON_ORGANIZATION_PRINT_PATTERN'], person_organization_url)
     person_id = parsed['person_id']
 
   logging.info("Getting person %d organizations from %s", person_id, person_committee_url)
   
   person = Person(originalId=person_id)
   
   time.sleep(self.config['scraper']['wait_time'])
   response = self.get_url(person_committee_url)
   if not response:
     return
   
   # seek(0) is necessary to reset response pointer.
   response.seek(0)
   html = response.read()
   html = html.replace('&nbsp;', ' ')
   parser = etree.HTMLParser()
   dom = etree.parse(StringIO(html), parser)
   
   trs = dom.xpath(self.xpath['PERSON_ORGANIZATION_LINES'])
   organisations = []
   memberships = []
   for tr in trs:
     tds = tr.xpath('.//td')
     long_info = False
     if len(tds) == 5:
       long_info = True
     if len(tds) == 5 or len(tds) == 2:
       if tds[0].xpath('.//a'):
         href = tds[0][0].get('href')
         href_tmp = href.split('&')
         # delete __cgrname when it's there
         if len(href_tmp) == 2:
           if href_tmp[1][0:10] == '__cgrname=':
             href = href_tmp[0]
         parsed = parse.search(self.urls['ORGANIZATION_DETAIL_PARSE_PATTERN'], href)
         if not parsed:
           parsed = parse.search(self.urls['ORGANIZATION_DETAIL_PARSE_PATTERN_FULL'], href)
         if parsed is not None:
           new_organisation = Organization(originalId=int(parsed['committee_id']))
           new_organisation.name = tds[0][0].text
       else:
         new_organisation = Organization(originalId=tds[0].text)
       if new_organisation and long_info:
         new_membership = Membership()
         membership_original_id = originalId=unicode(person_id) + '-' + unicode(new_organisation.originalId)
         if tds[2].text:
           new_membership.role = tds[2].text
         if tds[3].text:
           new_membership.startDate = tds[3].text
           membership_original_id += '-' + tds[3].text
         if tds[4].text:
           new_membership.endDate = tds[4].text
           membership_original_id += '-' + tds[4].text
         new_membership.originalId = membership_original_id
         new_membership.organization = new_organisation
         memberships.append(new_membership)
       else:
         if not new_organisation:
           logging.error("Bad Table Structure in %s", person_committee_url)
   if memberships:
     person.membership = memberships
   oid = self.db.save_person(person)
   logging.info("Person %d stored with _id %s", person_id, oid)
   return
 def _make_org(self, **kwargs):
   org = Organization(**kwargs)
   self.org_dao.insert(org)
   return org
예제 #20
0
    def test_schema(self):
        session = self.database.make_session()

        hpo = HPO(hpoId=1,
                  name='UNSET',
                  displayName='No organization set',
                  organizationType=OrganizationType.UNSET)
        code_book = CodeBook(codeBookId=1,
                             created=datetime.datetime.now(),
                             latest=True,
                             name="pmi",
                             system="http://foo/bar",
                             version="v0.1.1")
        session.add(hpo)
        session.add(code_book)
        session.commit()

        organization = Organization(organizationId=1,
                                    externalId='org',
                                    displayName='Organization',
                                    hpoId=1)
        session.add(organization)
        session.commit()

        site = Site(siteId=1,
                    siteName='site',
                    googleGroup='*****@*****.**',
                    mayolinkClientNumber=12345,
                    organizationId=1)
        code1 = Code(codeId=1,
                     codeBookId=1,
                     system="a",
                     value="b",
                     shortValue="q",
                     display=u"c",
                     topic=u"d",
                     codeType=CodeType.MODULE,
                     mapped=True,
                     created=datetime.datetime.now())
        codeHistory1 = CodeHistory(codeId=1,
                                   codeBookId=1,
                                   system="a",
                                   value="b",
                                   shortValue="q",
                                   display=u"c",
                                   topic=u"d",
                                   codeType=CodeType.MODULE,
                                   mapped=True,
                                   created=datetime.datetime.now())
        session.add(site)
        session.add(code1)
        session.add(codeHistory1)
        session.commit()

        code2 = Code(codeId=2,
                     codeBookId=1,
                     parentId=1,
                     system="a",
                     value="c",
                     display=u"X",
                     topic=u"d",
                     codeType=CodeType.QUESTION,
                     mapped=True,
                     created=datetime.datetime.now())
        codeHistory2 = CodeHistory(codeId=2,
                                   codeBookId=1,
                                   parentId=1,
                                   system="a",
                                   value="c",
                                   display=u"X",
                                   topic=u"d",
                                   codeType=CodeType.QUESTION,
                                   mapped=True,
                                   created=datetime.datetime.now())
        session.add(code2)
        session.add(codeHistory2)
        session.commit()

        code3 = Code(codeId=3,
                     codeBookId=1,
                     parentId=2,
                     system="a",
                     value="d",
                     display=u"Y",
                     topic=u"d",
                     codeType=CodeType.ANSWER,
                     mapped=False,
                     created=datetime.datetime.now())
        codeHistory3 = CodeHistory(codeId=3,
                                   codeBookId=1,
                                   parentId=2,
                                   system="a",
                                   value="d",
                                   display=u"Y",
                                   topic=u"d",
                                   codeType=CodeType.ANSWER,
                                   mapped=False,
                                   created=datetime.datetime.now())
        session.add(code3)
        session.add(codeHistory3)
        session.commit()

        session.commit()

        p = self._participant_with_defaults(
            participantId=1,
            version=1,
            biobankId=2,
            clientId='*****@*****.**',
            hpoId=hpo.hpoId,
            signUpTime=datetime.datetime.now(),
            lastModified=datetime.datetime.now())
        ps = self._participant_summary_with_defaults(
            participantId=1,
            biobankId=2,
            lastModified=datetime.datetime.now(),
            hpoId=hpo.hpoId,
            firstName=self.fake.first_name(),
            middleName=self.fake.first_name(),
            lastName=self.fake.last_name(),
            email=self.fake.email(),
            zipCode='78751',
            dateOfBirth=datetime.date.today(),
            genderIdentityId=1,
            consentForStudyEnrollment=QuestionnaireStatus.SUBMITTED,
            consentForStudyEnrollmentTime=datetime.datetime.now(),
            numBaselineSamplesArrived=2)
        p.participantSummary = ps
        session.add(p)
        ph = self._participant_history_with_defaults(
            participantId=1,
            biobankId=2,
            clientId='*****@*****.**',
            hpoId=hpo.hpoId,
            signUpTime=datetime.datetime.now(),
            lastModified=datetime.datetime.now())
        session.add(ph)
        session.commit()

        session.add(
            BiobankStoredSample(biobankStoredSampleId='WEB1234542',
                                biobankId=p.biobankId,
                                biobankOrderIdentifier='KIT',
                                test='1UR10',
                                confirmed=datetime.datetime.utcnow()))
        session.add(
            BiobankStoredSample(
                biobankStoredSampleId='WEB99999',  # Sample ID must be unique.
                biobankId=p.
                biobankId,  # Participant ID and test may be duplicated.
                biobankOrderIdentifier='KIT',
                test='1UR10',
                confirmed=datetime.datetime.utcnow()))

        pm = PhysicalMeasurements(physicalMeasurementsId=1,
                                  participantId=1,
                                  created=datetime.datetime.now(),
                                  resource='blah',
                                  final=False,
                                  logPosition=LogPosition())
        pm2 = PhysicalMeasurements(physicalMeasurementsId=2,
                                   participantId=1,
                                   created=datetime.datetime.now(),
                                   resource='blah',
                                   final=True,
                                   amendedMeasurementsId=1,
                                   logPosition=LogPosition())
        session.add(pm)
        session.add(pm2)
        session.commit()

        q1 = Measurement(measurementId=3,
                         physicalMeasurementsId=pm.physicalMeasurementsId,
                         codeSystem='codeSystem',
                         codeValue='codeValue',
                         measurementTime=datetime.datetime.now(),
                         valueCodeSystem='valueCodeSystem',
                         valueCodeValue='value3')
        session.add(q1)
        session.commit()

        m1 = Measurement(measurementId=1,
                         physicalMeasurementsId=pm.physicalMeasurementsId,
                         codeSystem='codeSystem',
                         codeValue='codeValue',
                         measurementTime=datetime.datetime.now(),
                         bodySiteCodeSystem='bodySiteCodeSystem',
                         bodySiteCodeValue='bodySiteCodeValue',
                         valueString='a',
                         valueDecimal=1.2,
                         valueUnit='cm',
                         valueCodeSystem='valueCodeSystem',
                         valueCodeValue='value',
                         valueDateTime=datetime.datetime.now(),
                         qualifierId=q1.measurementId)
        session.add(m1)
        session.commit()

        m2 = Measurement(measurementId=2,
                         physicalMeasurementsId=pm.physicalMeasurementsId,
                         codeSystem='codeSystem',
                         codeValue='codeValue',
                         measurementTime=datetime.datetime.now(),
                         valueCodeSystem='valueCodeSystem',
                         valueCodeValue='value2',
                         parentId=m1.measurementId,
                         qualifierId=q1.measurementId)
        session.add(m2)
        session.commit()

        q = Questionnaire(questionnaireId=1,
                          version=1,
                          created=datetime.datetime.now(),
                          lastModified=datetime.datetime.now(),
                          resource='what?')
        qh = QuestionnaireHistory(questionnaireId=1,
                                  version=1,
                                  created=datetime.datetime.now(),
                                  lastModified=datetime.datetime.now(),
                                  resource='what?')
        qh.questions.append(
            QuestionnaireQuestion(questionnaireQuestionId=1,
                                  questionnaireId=1,
                                  questionnaireVersion=1,
                                  linkId="1.2.3",
                                  codeId=2,
                                  repeats=True))
        qh.concepts.append(
            QuestionnaireConcept(questionnaireConceptId=1,
                                 questionnaireId=1,
                                 questionnaireVersion=1,
                                 codeId=1))
        session.add(q)
        session.add(qh)
        session.commit()

        qr = QuestionnaireResponse(questionnaireResponseId=1,
                                   questionnaireId=1,
                                   questionnaireVersion=1,
                                   participantId=1,
                                   created=datetime.datetime.now(),
                                   resource='blah')
        qr.answers.append(
            QuestionnaireResponseAnswer(questionnaireResponseAnswerId=1,
                                        questionnaireResponseId=1,
                                        questionId=1,
                                        endTime=datetime.datetime.now(),
                                        valueSystem='a',
                                        valueCodeId=3,
                                        valueDecimal=123,
                                        valueString=self.fake.first_name(),
                                        valueDate=datetime.date.today()))

        session.add(qr)
        session.commit()

        mv = MetricsVersion(metricsVersionId=1,
                            inProgress=False,
                            complete=True,
                            date=datetime.datetime.utcnow(),
                            dataVersion=1)
        session.add(mv)
        session.commit()

        mb = MetricsBucket(metricsVersionId=1,
                           date=datetime.date.today(),
                           hpoId='PITT',
                           metrics='blah')
        session.add(mb)
        session.commit()