Esempio n. 1
0
    def validate_user(self, u):
        timestamp('START validating user ' + u)
        # split the other user's data in half for training/validation
        # then just use these halves for all of the user's CV splits
        impostors = [x for x in self.data.keys() if x != u]
        inner_train = {x:[] for x in impostors}
        outer_train = {x:[] for x in impostors}
        inner_val = {x:[] for x in impostors}
        outer_val = {x:[] for x in impostors}
        for x in impostors:
            outer_val[x] = self.data[x][:self.p[x]]
            outer_train[x] = self.data[x][self.p[x]:]

            inner_val[x] = self.data[x][ self.p[x] : self.p[x]+self.k[x] ]
            inner_train[x] = self.data[x][ self.p[x]+self.k[x] : ]
        timestamp('constructed impostor sample partitions')

        print 'outer loop iterations:', int(misc.comb(len(self.data[u]), self.p[u]))
        print 'inner loop iterations:', int(misc.comb(len(self.data[u])-self.p[u], self.k[u]))

        impostor_params = None
        for n_outer, part in enumerate(partition_data(u, self.data[u], self.p[u])):
            # set the new partitions for this user
            outer_val[part[0]] = part[2]
            outer_train[part[0]] = part[1]
            for n_inner, inner_part in enumerate(partition_data(u, part[1], self.k[u])):
                inner_train[part[0]] = inner_part[1]
                inner_val[part[0]] = inner_part[2]
                
                self.auth.update_model(inner_train, [u])
                if n_inner == 0 and n_outer == 0:
                    self.auth.score(inner_val, [u])
                    inner_ll = self.auth.ll_dict
                else:
                    self.auth.ll_dict = inner_ll
                    self.auth.score(inner_val, [u], clear_cache=False)
                if n_inner % 5 == 0:
                    timestamp('finished inner loop iteration ' + str(n_inner))
            self.auth.compute_threshold()
            timestamp('finished threshold computation')
            self.auth.update_model(outer_train, [u])
            timestamp('finished final model estimation')

            #yield self.auth.evaluate(outer_train, user_ls=[u]), self.auth.evaluate(outer_val, user_ls=[u])
            yield self.auth.evaluate(outer_val, user_ls=[u])
            timestamp('finished outer loop iteration ' + str(n_outer))
            self.auth.scores = {}
Esempio n. 2
0
 def validate(self):
     '''
     takes (username -> [(n-graph -> [latencies])])
     returns a list of results from several partitions of the data
     '''
     for partition in itertools.product(
             *[partition_data(u, self.data[u], self.p[u])
             for u in self.data.keys()]
     ):
         train = {x[0]:x[1] for x in list(partition)}
         val = {x[0]:x[2] for x in list(partition)}
         for inner_part in itertools.product(
                 *[partition_data(u, train[u], self.k[u])
                   for u in train.keys()]
         ):
             inner_train = {x[0]:x[1] for x in list(inner_part)}
             inner_val = {x[0]:x[2] for x in list(inner_part)}
             self.auth.estimate_model(inner_train, inner_val)
             self.auth.score(inner_val)
         self.auth.estimate_model(train,val)
         self.auth.compute_threshold()
         yield self.auth.evaluate(train), self.auth.evaluate(val)
         self.auth.scores = {}
    for k,x in pkd.items(): 
        print k, ',', x[0], ',', x[1], ',', int(x[2]/x[0]), ',', int(x[3]/x[1])
        
    print ((pkd['1227981'][2]/pkd['1227981'][0])*
           (pkd['1227981'][3]/pkd['1227981'][1])*
           (pkd['9999999'][2]/pkd['9999999'][0])*
           (pkd['9999999'][3]/pkd['9999999'][1])
    )
    for u in f.keys():
        if not u in ['1227981', '9999999']:
            del f[u]
    data = f
    p = {u:int(pkd[u][0]) for u in ['1227981', '9999999']}
    k = {u:int(pkd[u][1]) for u in ['1227981', '9999999']}
    i1 = itertools.product(
        *[partition_data(u, data[u], p[u])
          for u in ['1227981', '9999999']]
    )
    lli1 = len(list(i1))
    i1 = itertools.product(
        *[partition_data(u, data[u], p[u])
          for u in ['1227981', '9999999']]
    )

    for i in i1:
        train = {x[0]:x[1] for x in list(i)}

        i2 = itertools.product(
            *[partition_data(u, train[u], k[u])
              for u in ['1227981', '9999999']]
        )