class UserTests(unittest2.TestCase): @classmethod def setUpClass(self): cfg = Config() self.axi = xapian.Database(cfg.axi) packages = ["gimp","aaphoto","eog","emacs","dia","ferret", "festival","file","inkscape","xpdf"] path = "test_data/.sample_axi" self.sample_axi = SampleAptXapianIndex(packages,self.axi,path) self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) def test_hash(self): new_user = User(dict()) self.assertIsNotNone(new_user.id) self.assertNotEqual(self.user.id, new_user.id) def test_profile_default(self): new_user = User(dict()) desktop = set(["x11", "accessibility", "game", "junior", "office", "interface::x11"]) self.assertEqual(new_user.demographic_profile,desktop) def test_profile_desktop(self): self.user.set_demographic_profile(set(["desktop"])) desktop = set(["x11", "accessibility", "game", "junior", "office", "interface::x11"]) self.assertEqual(self.user.demographic_profile,desktop) def test_profile_admin(self): self.user.set_demographic_profile(set(["admin"])) admin = set(["admin", "hardware", "mail", "protocol", "network", "security", "web", "interface::web"]) self.assertEqual(self.user.demographic_profile,admin) def test_profile_devel(self): self.user.set_demographic_profile(set(["devel"])) devel = set(["devel", "role::devel-lib", "role::shared-lib"]) self.assertEqual(self.user.demographic_profile,devel) def test_profile_art(self): self.user.set_demographic_profile(set(["art"])) art = set(["field::arts", "sound"]) self.assertEqual(self.user.demographic_profile,art) def test_profile_science(self): self.user.set_demographic_profile(set(["science"])) science = set(["science", "biology", "field::astronomy", "field::aviation", "field::biology", "field::chemistry", "field::eletronics", "field::finance", "field::geography", "field::geology", "field::linguistics", "field::mathematics", "field::medicine", "field::meteorology", "field::physics", "field::statistics"]) self.assertEqual(self.user.demographic_profile,science) def test_multi_profile(self): self.user.set_demographic_profile(set(["devel","art"])) devel_art = set(["devel", "role::devel-lib", "role::shared-lib", "field::arts", "sound"]) self.assertEqual(self.user.demographic_profile,devel_art) self.user.set_demographic_profile(set(["art","admin","desktop"])) desktop_art_admin = set(["x11", "accessibility", "game", "junior", "office", "interface::x11", "field::arts", "sound", "admin", "hardware", "mail", "protocol", "network", "security", "web", "interface::web"]) self.assertEqual(self.user.demographic_profile,desktop_art_admin) def test_items(self): self.assertEqual(set(self.user.items()), set(["gimp","aaphoto","eog","emacs"])) def test_profile(self): self.assertEqual(self.user.profile(self.sample_axi,"tag",10), self.user.tag_profile(self.sample_axi,10)) self.assertEqual(self.user.profile(self.sample_axi,"desc",10), self.user.desc_profile(self.sample_axi,10)) self.assertEqual(self.user.profile(self.sample_axi,"full",10), self.user.full_profile(self.sample_axi,10)) def test_tag_profile(self): self.assertEqual(self.user.tag_profile(self.sample_axi,1), ['XTuse::editing']) def test_desc_profile(self): self.assertEqual(self.user.desc_profile(self.sample_axi,1), ['image']) def test_full_profile(self): self.assertEqual(self.user.full_profile(self.sample_axi,10), (self.user.tag_profile(self.sample_axi,5)+ self.user.desc_profile(self.sample_axi,5))) def test_maximal_pkg_profile(self): old_pkg_profile = self.user.items() aaphoto_deps = ["libc6", "libgomp1", "libjasper1", "libjpeg62", "libpng12-0"] libc6_deps = ["libc-bin", "libgcc1"] for pkg in aaphoto_deps+libc6_deps: self.user.item_score[pkg] = 1 self.assertEqual(old_pkg_profile,self.user.maximal_pkg_profile())
def xuser(model, model_order, rules, coord): mode = 2 # [0, 1, 2] 0 : binary, 1 : gender, age, csp & geo [0..1/7/7/4], 2 : write both files. val = 1 # value for interests / MI used when not projected multiple_val = set(['Interests','Market intent']) forbidden_val = set(['Funnel','Customer segmentation']) with open('dataset_20M.csv','wb') as f: wri = writer(f) with open('dataset_20M_bin.csv','wb') as f_b: wri_b = writer(f_b) while True: user = User() for dim in model_order: if dim in ["Age", "Geography", "CSP", "Gender"]: dist = map(float, user.matches_rules(dim, rules).split(" ")) cumDist = genCumDist(dist) user[dim] = gen_non_bin_cat(model[dim], cumDist) elif dim in ["Interests", "Market intent"]: user[dim]=[] for ssdim in model[dim]: dist = float(user.matches_rules(ssdim, rules)) if gen_bin_cat(dist): user[dim].append(ssdim) elif dim == "Funnel": for ssdim in model[dim]: user[dim] = [] dist = float(user.matches_rules(ssdim, rules)) if ssdim in ["General population","Visitor"] and gen_bin_cat(dist): user[dim].append(ssdim) elif ssdim == "Client" and "Visitor" in user[dim] and gen_bin_cat(dist): user[dim].append(ssdim) elif ssdim == "Loyal client" and "Client" in user[dim] and gen_bin_cat(dist): user[dim].append(ssdim) #elif dim == "Customer segmentation": # if user["Funnel"] in ["Client","Loyal client"]: # dist = map(float, user.matches_rules(dim, rules).split(" ")) # cumDist = genCumDist(dist) # user[dim]= gen_non_bin_cat(model[dim], cumDist) #elif dim == "Client purchasing categories": # if user["Funnel"] in ["Client","Loyal client"]: # user[dim]=[] # for ssdim in model[dim]: # dist = float(user.matches_rules(ssdim, rules)) # if gen_bin_cat(dist): user[dim].append(ssdim) #elif dim == "Performance": # for perf in ['Active views',"Impressions", 'Clicks', 'Conversions']: # user[perf] = 0 # for ssdim in model[dim]: # dist = float(user.matches_rules(ssdim, rules)) # user.set_traffic_data(ssdim, dist) #else: # raise Exception("Parse error: %s" % dim) if mode == 0: ub = [0] * (26 + 20) for k,v in user.items(): if k in multiple_val: for e in v: #print e, coord[k][e] + 4 ub[coord[k][e] + 20] = 1 if k == 'Gender': ub[coord[k][v]] = 1 if k == 'Age': ub[2 + coord[k][v]] = 1 if k == 'CSP': ub[9 + coord[k][v]] = 1 if k == 'Geography': ub[16 + coord[k][v]] = 1 wri_b.writerow(ub) elif mode == 2: ub = [0] * (26 + 20) u = [0] * (26 + 4) for k,v in user.items(): if k in multiple_val: for e in v: #print e, coord[k][e] + 4 ub[coord[k][e] + 20] = 1 u[coord[k][e] + 4] = val if k == 'Gender': ub[coord[k][v]] = 1 u[0] = coord[k][v] if k == 'Age': ub[2 + coord[k][v]] = 1 u[1] = coord[k][v] if k == 'CSP': ub[9 + coord[k][v]] = 1 u[2] = coord[k][v] if k == 'Geography': ub[16 + coord[k][v]] = 1 u[3] = coord[k][v] wri.writerow(u) wri_b.writerow(ub) elif mode == 1: u = [0] * (26 + 4) for k,v in user.items(): if k in multiple_val: for e in v: #print e, coord[k][e] + 4 u[coord[k][e] + 4] = val if k == 'Gender': u[0] = coord[k][v] if k == 'Age': u[1] = coord[k][v] if k == 'CSP': u[2] = coord[k][v] if k == 'Geography': u[3] = coord[k][v] wri.writerow(u) yield user