def test_init_args():
    temp = ItemSortedDict(key_func, enumerate(alphabet))
    assert len(temp) == 26
    assert temp[0] == 'a'
    assert temp[25] == 'z'
    assert temp.iloc[4] == 4
    temp._check()
def test_init_args():
    temp = ItemSortedDict(key_func, enumerate(alphabet))
    assert len(temp) == 26
    assert temp[0] == 'a'
    assert temp[25] == 'z'
    assert temp.keys()[4] == 4
    temp._check()
def test_setitem():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    temp[25] = '!'
    del temp[25]
    iloc = temp.keys()
    assert iloc[0] == 24
    temp[25] = 'a'
    assert iloc[0] == 25
def decreasing_dict():
    """This initialization orders the dictionary from large to small values"""

    def sorting_rule(key, value):
        return -value

    return ItemSortedDict(sorting_rule, SortedDict())
Exemple #5
0
class topK_max:
    '''
    Given a stream of (key,value,ridealong) tuples, remember the k largest values.
    If a key is added repeatedly, use the largest value.
    '''
    def __init__(self, size):
        self.size = size
        self.element = namedtuple('topK_max_element', ['value', 'ridealong'])
        self.d = ItemSortedDict(getvaluevalue)

    def add(self, key, value, ridealong):
        if key in self.d:
            if value >= self.d[key].value:
                self.d[key] = self.element(value, ridealong)
        elif len(self.d) < self.size:
            self.d[key] = self.element(value, ridealong)
        elif value > self.d.peekitem()[1].value:
            self.d.popitem()
            self.d[key] = self.element(value, ridealong)

    def readout(self):
        return [(k, list(v)) for k, v in self.d.iteritems()]
Exemple #6
0
class topK_sum:
    '''
    Space-saving heavy hitters.

    Given a stream of (key, value) tuples, rememgber the k items
    with the largest sum of values.

    http://www.cse.ust.hk/~raywong/comp5331/References/EfficientComputationOfFrequentAndTop-kElementsInDataStreams.pdf
    '''
    def __init__(self, size):
        self.size = size
        self.element = namedtuple('topK_max_element',
                                  ['value', 'ridealong', 'fake'])
        self.d = ItemSortedDict(getvaluevalue)

    def add(self, key, value, ridealong):
        if key in self.d:
            self.d[key] = self.element(self.d[key].value + value, ridealong,
                                       self.d[key].fake)
        elif len(self.d) < self.size:
            self.d[key] = self.element(value, ridealong, 0)
        elif value >= self.d.peekitem()[1].value:
            self.d.popitem()
            self.d[key] = self.element(value, ridealong, 0)
        else:
            evicted = self.d.popitem()
            oldvalue = evicted[1].value
            newvalue = max(value, oldvalue + 1)
            fake = max(newvalue - value, 0)
            self.d[key] = self.element(newvalue, ridealong, fake)

    def readout(self):
        ret = []
        for i in self.d.iteritems():
            if i[1].value > 2 * i[1].fake:
                ret.append((i[0], [i[1].value, i[1].ridealong]))
        return ret
def test_init():
    temp = ItemSortedDict(key_func)
    temp._check()
def test_copy():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    that = temp.copy()
    assert temp == that
    assert temp._key != that._key
def test_init():
    temp = ItemSortedDict(key_func)
    temp._check()
def test_delitem_error():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    del temp[-1]
def test_delitem():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    del temp[25]
    assert temp.keys()[0] == 24
def test_getitem():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    assert temp[0] == 'z'
    assert temp.keys()[0] == 25
    assert list(temp) == list(reversed(range(26)))
def test_init_kwargs():
    temp = ItemSortedDict(key_func, a=0, b=1, c=2)
    assert len(temp) == 3
    assert temp['a'] == 0
    assert temp.keys()[0] == 'a'
    temp._check()
def test_init_kwargs():
    temp = ItemSortedDict(key_func, a=0, b=1, c=2)
    assert len(temp) == 3
    assert temp['a'] == 0
    assert temp.iloc[0] == 'a'
    temp._check()
def test_update():
    temp = ItemSortedDict(lambda key, value: value)
    for index, letter in enumerate(alphabet):
        pair = {index: letter}
        temp.update(pair)
def test_delitem_error():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    with pytest.raises(KeyError):
        del temp[-1]
def test_copy():
    temp = ItemSortedDict(value_func, enumerate(reversed(alphabet)))
    that = temp.copy()
    assert temp == that
    assert temp._key != that._key
Exemple #18
0
 def __init__(self, size):
     self.size = size
     self.element = namedtuple('topK_max_element',
                               ['value', 'ridealong', 'fake'])
     self.d = ItemSortedDict(getvaluevalue)
Exemple #19
0
def update(interest, membershipfile):
    """update member, membership tables, from membershipfile if supplied, or from service based on interest"""
    thislogger = getLogger('members.cli')
    if debug:
        thislogger.setLevel(DEBUG)
    else:
        thislogger.setLevel(INFO)
    thislogger.propagate = True

    # set local interest
    g.interest = interest
    linterest = localinterest()

    # assume update will complete ok
    tableupdatetime = TableUpdateTime.query.filter_by(
        interest=linterest, tablename='member').one_or_none()
    if not tableupdatetime:
        tableupdatetime = TableUpdateTime(interest=linterest,
                                          tablename='member')
        db.session.add(tableupdatetime)
    tableupdatetime.lastchecked = datetime.today()

    # normal case is download from RunSignUp service
    if not membershipfile:
        # get, check club id
        club_id = linterest.service_id
        if not (linterest.club_service == 'runsignup' and club_id):
            raise ParameterError(
                'interest Club Service must be runsignup, and Service ID must be defined'
            )

        # transform: membership "file" format from RunSignUp API
        xform = Transform(
            {
                'MemberID':
                lambda mem: mem['user']['user_id'],
                'MembershipID':
                'membership_id',
                'MembershipType':
                'club_membership_level_name',
                'FamilyName':
                lambda mem: mem['user']['last_name'],
                'GivenName':
                lambda mem: mem['user']['first_name'],
                'MiddleName':
                lambda mem: mem['user']['middle_name']
                if mem['user']['middle_name'] else '',
                'Gender':
                lambda mem: 'Female'
                if mem['user']['gender'] == 'F' else 'Male',
                'DOB':
                lambda mem: mem['user']['dob'],
                'City':
                lambda mem: mem['user']['address']['city'],
                'State':
                lambda mem: mem['user']['address']['state'],
                'Email':
                lambda mem: mem['user']['email']
                if 'email' in mem['user'] else '',
                'PrimaryMember':
                'primary_member',
                'JoinDate':
                'membership_start',
                'ExpirationDate':
                'membership_end',
                'LastModified':
                'last_modified',
            },
            sourceattr=False,  # source and target are dicts
            targetattr=False)
        rsu = RunSignUp(key=current_app.config['RSU_KEY'],
                        secret=current_app.config['RSU_SECRET'],
                        debug=debug)

        def doxform(ms):
            membership = {}
            xform.transform(ms, membership)
            return membership

        with rsu:
            # get current and future members from RunSignUp, and put into common format
            rawmemberships = rsu.members(club_id, current_members_only='F')
            currfuturememberships = [
                m for m in rawmemberships
                if m['membership_end'] >= datetime.today().date().isoformat()
            ]
            memberships = [doxform(ms) for ms in currfuturememberships]

    # membershipfile supplied
    else:
        with open(membershipfile, 'r') as _MF:
            MF = DictReader(_MF)
            # memberships already in common format
            memberships = [ms for ms in MF]

    # sort memberships by member (family_name, given_name, gender, dob), expiration_date
    memberships.sort(key=lambda m: (m['FamilyName'], m['GivenName'], m[
        'Gender'], m['DOB'], m['ExpirationDate']))

    # set up member, membership transforms to create db records
    # transform: member record from membership "file" format
    memxform = Transform(
        {
            'family_name':
            'FamilyName',
            'given_name':
            'GivenName',
            'middle_name':
            'MiddleName',
            'gender':
            'Gender',
            'svc_member_id':
            'MemberID',
            'dob':
            lambda m: isodate.asc2dt(m['DOB']).date(),
            'hometown':
            lambda m: f'{m["City"]}, {m["State"]}'
            if 'City' in m and 'State' in m else '',
            'email':
            'Email',
            'start_date':
            lambda m: isodate.asc2dt(m['JoinDate']).date(),
            'end_date':
            lambda m: isodate.asc2dt(m['ExpirationDate']).date(),
        },
        sourceattr=False,
        targetattr=True)
    # transform: update member record from membership record
    memupdate = Transform(
        {
            'svc_member_id': 'svc_member_id',
            'hometown': 'hometown',
            'email': 'email',
        },
        sourceattr=True,
        targetattr=True)
    # transform: membership record from membership "file" format
    mshipxform = Transform(
        {
            'svc_member_id':
            'MemberID',
            'svc_membership_id':
            'MembershipID',
            'membershiptype':
            'MembershipType',
            'hometown':
            lambda m: f'{m["City"]}, {m["State"]}'
            if 'City' in m and 'State' in m else '',
            'email':
            'Email',
            'start_date':
            lambda m: isodate.asc2dt(m['JoinDate']).date(),
            'end_date':
            lambda m: isodate.asc2dt(m['ExpirationDate']).date(),
            'primary':
            lambda m: m['PrimaryMember'].lower() == 't' or m['PrimaryMember'].
            lower() == 'yes',
            'last_modified':
            lambda m: rsudt.asc2dt(m['LastModified']),
        },
        sourceattr=False,
        targetattr=True)

    # insert member, membership records
    for m in memberships:
        # need MembershipId to be string for comparison with database key
        m['MembershipID'] = str(m['MembershipID'])

        filternamedob = and_(Member.family_name == m['FamilyName'],
                             Member.given_name == m['GivenName'],
                             Member.gender == m['Gender'],
                             Member.dob == isodate.asc2dt(m['DOB']))
        # func.binary forces case sensitive comparison. see https://stackoverflow.com/a/31788828/799921
        filtermemberid = Member.svc_member_id == func.binary(m['MemberID'])
        filtermember = or_(filternamedob, filtermemberid)

        # get all the member records for this member
        # note there may currently be more than one member record, as the memberships may be discontiguous
        thesemembers = SortedList(key=lambda member: member.end_date)
        thesemembers.update(Member.query.filter(filtermember).all())

        # if member doesn't exist, create member and membership records
        if len(thesemembers) == 0:
            thismember = Member(interest=localinterest())
            memxform.transform(m, thismember)
            db.session.add(thismember)
            # flush so thismember can be referenced in thismship, and can be found in later processing
            db.session.flush()
            thesemembers.add(thismember)

            thismship = Membership(interest=localinterest(), member=thismember)
            mshipxform.transform(m, thismship)
            db.session.add(thismship)
            # flush so thismship can be found in later processing
            db.session.flush()

        # if there are already some memberships for this member, merge with this membership (m)
        else:
            # dbmships is keyed by svc_membership_id, sorted by end_date
            # NOTE: membership id is unique only within a member -- be careful if the use of dbmships changes
            # to include multiple members
            dbmships = ItemSortedDict(lambda k, v: v.end_date)
            for thismember in thesemembers:
                for mship in thismember.memberships:
                    dbmships[mship.svc_membership_id] = mship

            # add membership if not already there for this member
            mshipid = m['MembershipID']
            if mshipid not in dbmships:
                newmship = True
                thismship = Membership(interest=localinterest())
                db.session.add(thismship)
                # flush so thismship can be found in later processing
                db.session.flush()

            # update existing membership
            else:
                newmship = False
                thismship = dbmships[mshipid]

            # merge the new membership record into the database record
            mshipxform.transform(m, thismship)

            # add new membership to data structure
            if newmship:
                dbmships[thismship.svc_membership_id] = thismship

            # need list view for some processing
            dbmships_keys = dbmships.keys()

            # check for overlaps
            for thisndx in range(1, len(dbmships_keys)):
                prevmship = dbmships[dbmships_keys[thisndx - 1]]
                thismship = dbmships[dbmships_keys[thisndx]]
                if thismship.start_date <= prevmship.end_date:
                    oldstart = thismship.start_date
                    newstart = prevmship.end_date + timedelta(1)
                    oldstartasc = isodate.dt2asc(oldstart)
                    newstartasc = isodate.dt2asc(newstart)
                    endasc = isodate.dt2asc(thismship.end_date)
                    memberkey = f'{m["FamilyName"]},{m["GivenName"]},{m["DOB"]}'
                    thislogger.warn(
                        f'overlap detected for {memberkey}: end={endasc} was start={oldstartasc} now start={newstartasc}'
                    )
                    thismship.start_date = newstart

            # update appropriate member record(s), favoring earlier member records
            # NOTE: membership hometown, email get copied into appropriate member records;
            #   since mship list is sorted, last one remains
            for mshipid in dbmships_keys:
                mship = dbmships[mshipid]
                for nextmndx in range(len(thesemembers)):
                    thismember = thesemembers[nextmndx]
                    lastmember = thesemembers[nextmndx -
                                              1] if nextmndx != 0 else None

                    # TODO: use Transform for these next four entries
                    # corner case: someone changed their birthdate
                    thismember.dob = isodate.asc2dt(m['DOB']).date()

                    # prefer last name found
                    thismember.given_name = m['GivenName']
                    thismember.family_name = m['FamilyName']
                    thismember.middle_name = m['MiddleName'] if m[
                        'MiddleName'] else ''

                    # mship causes new member record before this one
                    #   or after end of thesemembers
                    #   or wholy between thesemembers
                    if (mship.end_date + timedelta(1) < thismember.start_date
                            or (nextmndx == len(thesemembers) - 1)
                            and mship.start_date >
                            thismember.end_date + timedelta(1)
                            or lastmember and mship.start_date >
                            lastmember.end_date + timedelta(1)
                            and mship.end_date < thismember.start_date):
                        newmember = Member(interest=localinterest())
                        # flush so thismember can be referenced in mship, and can be found in later processing
                        db.session.flush()
                        memxform.transform(m, newmember)
                        mship.member = newmember
                        break

                    # mship extends this member record from the beginning
                    if mship.end_date + timedelta(1) == thismember.start_date:
                        thismember.start_date = mship.start_date
                        mship.member = thismember
                        memupdate.transform(mship, thismember)
                        break

                    # mship extends this member from the end
                    if mship.start_date == thismember.end_date + timedelta(1):
                        thismember.end_date = mship.end_date
                        mship.member = thismember
                        memupdate.transform(mship, thismember)
                        break

                    # mship end date was changed
                    if (mship.start_date >= thismember.start_date
                            and mship.start_date <= thismember.end_date
                            and mship.end_date != thismember.end_date):
                        thismember.end_date = mship.end_date
                        mship.member = thismember
                        memupdate.transform(mship, thismember)
                        break

                    # mship start date was changed
                    if (mship.end_date >= thismember.start_date
                            and mship.end_date <= thismember.end_date
                            and mship.start_date != thismember.start_date):
                        thismember.start_date = mship.start_date
                        mship.member = thismember
                        memupdate.transform(mship, thismember)
                        break

                    # mship wholly contained within this member
                    if mship.start_date >= thismember.start_date and mship.end_date <= thismember.end_date:
                        mship.member = thismember
                        memupdate.transform(mship, thismember)
                        break

            # delete unused member records
            delmembers = []
            for mndx in range(len(thesemembers)):
                thismember = thesemembers[mndx]
                if len(thismember.memberships) == 0:
                    delmembers.append(thismember)
            for delmember in delmembers:
                db.session.delete(delmember)
                thesemembers.remove(delmember)
            if len(delmembers) > 0:
                db.session.flush()

            # merge member records as appropriate
            thisndx = 0
            delmembers = []
            for nextmndx in range(1, len(thesemembers)):
                thismember = thesemembers[thisndx]
                nextmember = thesemembers[nextmndx]
                if thismember.end_date + timedelta(1) == nextmember.start_date:
                    for mship in nextmember.memberships:
                        mship.member = thismember
                        delmembers.append(nextmember)
                else:
                    thisndx = nextmndx
            for delmember in delmembers:
                db.session.delete(delmember)
            if len(delmembers) > 0:
                db.session.flush()

    # save statistics file
    groupfolder = join(current_app.config['APP_FILE_FOLDER'], interest)
    if not exists(groupfolder):
        mkdir(groupfolder, mode=0o770)
    statspath = join(groupfolder, current_app.config['APP_STATS_FILENAME'])
    analyzemembership(statsfile=statspath)

    # make sure we remember everything we did
    db.session.commit()