Example #1
0
class UserTests(unittest2.TestCase):
    @classmethod
    def setUpClass(self):
        cfg = Config()
        self.axi = xapian.Database(cfg.axi)
        packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
                    "festival","file","inkscape","xpdf"]
        path = "test_data/.sample_axi"
        self.sample_axi = SampleAptXapianIndex(packages,self.axi,path)
        self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})

    def test_hash(self):
        new_user = User(dict())
        self.assertIsNotNone(new_user.id)
        self.assertNotEqual(self.user.id, new_user.id)

    def test_profile_default(self):
        new_user = User(dict())
        desktop = set(["x11", "accessibility", "game", "junior", "office",
                       "interface::x11"])
        self.assertEqual(new_user.demographic_profile,desktop)

    def test_profile_desktop(self):
        self.user.set_demographic_profile(set(["desktop"]))
        desktop = set(["x11", "accessibility", "game", "junior", "office",
                       "interface::x11"])
        self.assertEqual(self.user.demographic_profile,desktop)

    def test_profile_admin(self):
        self.user.set_demographic_profile(set(["admin"]))
        admin = set(["admin", "hardware", "mail", "protocol",
                     "network", "security", "web", "interface::web"])
        self.assertEqual(self.user.demographic_profile,admin)

    def test_profile_devel(self):
        self.user.set_demographic_profile(set(["devel"]))
        devel = set(["devel", "role::devel-lib", "role::shared-lib"])
        self.assertEqual(self.user.demographic_profile,devel)

    def test_profile_art(self):
        self.user.set_demographic_profile(set(["art"]))
        art = set(["field::arts", "sound"])
        self.assertEqual(self.user.demographic_profile,art)

    def test_profile_science(self):
        self.user.set_demographic_profile(set(["science"]))
        science = set(["science", "biology", "field::astronomy",
                       "field::aviation",  "field::biology",
                       "field::chemistry", "field::eletronics",
                       "field::finance", "field::geography",
                       "field::geology", "field::linguistics",
                       "field::mathematics", "field::medicine",
                       "field::meteorology", "field::physics",
                       "field::statistics"])
        self.assertEqual(self.user.demographic_profile,science)

    def test_multi_profile(self):
        self.user.set_demographic_profile(set(["devel","art"]))
        devel_art = set(["devel", "role::devel-lib", "role::shared-lib",
                         "field::arts", "sound"])
        self.assertEqual(self.user.demographic_profile,devel_art)

        self.user.set_demographic_profile(set(["art","admin","desktop"]))
        desktop_art_admin = set(["x11", "accessibility", "game", "junior",
                                 "office", "interface::x11", "field::arts",
                                 "sound", "admin", "hardware", "mail",
                                 "protocol", "network", "security", "web",
                                 "interface::web"])
        self.assertEqual(self.user.demographic_profile,desktop_art_admin)

    def test_items(self):
        self.assertEqual(set(self.user.items()),
                         set(["gimp","aaphoto","eog","emacs"]))

    def test_profile(self):
        self.assertEqual(self.user.profile(self.sample_axi,"tag",10),
                         self.user.tag_profile(self.sample_axi,10))
        self.assertEqual(self.user.profile(self.sample_axi,"desc",10),
                         self.user.desc_profile(self.sample_axi,10))
        self.assertEqual(self.user.profile(self.sample_axi,"full",10),
                         self.user.full_profile(self.sample_axi,10))

    def test_tag_profile(self):
        self.assertEqual(self.user.tag_profile(self.sample_axi,1),
                         ['XTuse::editing'])

    def test_desc_profile(self):
        self.assertEqual(self.user.desc_profile(self.sample_axi,1),
                         ['image'])

    def test_full_profile(self):
        self.assertEqual(self.user.full_profile(self.sample_axi,10),
                         (self.user.tag_profile(self.sample_axi,5)+
                          self.user.desc_profile(self.sample_axi,5)))

    def test_maximal_pkg_profile(self):
        old_pkg_profile = self.user.items()
        aaphoto_deps = ["libc6", "libgomp1", "libjasper1", "libjpeg62",
                        "libpng12-0"]
        libc6_deps = ["libc-bin", "libgcc1"]

        for pkg in aaphoto_deps+libc6_deps:
            self.user.item_score[pkg] = 1

        self.assertEqual(old_pkg_profile,self.user.maximal_pkg_profile())
Example #2
0
def xuser(model, model_order, rules, coord):
  mode = 2 # [0, 1, 2] 0 : binary, 1 : gender, age, csp & geo [0..1/7/7/4], 2 : write both files.
  val = 1 # value for interests / MI used when not projected
  multiple_val = set(['Interests','Market intent'])
  forbidden_val = set(['Funnel','Customer segmentation'])
  with open('dataset_20M.csv','wb') as f:
    wri = writer(f)
    with open('dataset_20M_bin.csv','wb') as f_b:
      wri_b = writer(f_b)
      while True:
        user = User()
        for dim in model_order:
          if dim in ["Age", "Geography", "CSP", "Gender"]:
            dist = map(float, user.matches_rules(dim, rules).split(" "))
            cumDist = genCumDist(dist)
            user[dim] = gen_non_bin_cat(model[dim], cumDist)      
          elif dim in ["Interests", "Market intent"]:
            user[dim]=[]
            for ssdim in model[dim]:
              dist = float(user.matches_rules(ssdim, rules))
              if gen_bin_cat(dist):
                user[dim].append(ssdim) 
          elif dim == "Funnel":
            for ssdim in model[dim]:
              user[dim] = []
              dist = float(user.matches_rules(ssdim, rules))
              if ssdim in ["General population","Visitor"] and gen_bin_cat(dist):
                user[dim].append(ssdim) 
              elif ssdim == "Client" and "Visitor" in user[dim] and gen_bin_cat(dist):
                user[dim].append(ssdim)
              elif ssdim == "Loyal client" and "Client" in user[dim] and gen_bin_cat(dist):
                user[dim].append(ssdim)
          #elif dim == "Customer segmentation":
          #  if user["Funnel"] in ["Client","Loyal client"]:
          #    dist = map(float, user.matches_rules(dim, rules).split(" "))
          #    cumDist = genCumDist(dist)
          #    user[dim]= gen_non_bin_cat(model[dim], cumDist)
          #elif dim == "Client purchasing categories":
          #  if user["Funnel"] in ["Client","Loyal client"]:
          #    user[dim]=[]
          #    for ssdim in model[dim]:
          #      dist = float(user.matches_rules(ssdim, rules))
          #      if gen_bin_cat(dist): user[dim].append(ssdim) 
          #elif dim == "Performance":
          #  for perf in ['Active views',"Impressions", 'Clicks', 'Conversions']:
          #    user[perf] = 0
          #  for ssdim in model[dim]:
          #    dist = float(user.matches_rules(ssdim, rules))
          #    user.set_traffic_data(ssdim, dist)
          #else:
          #  raise Exception("Parse error: %s" % dim)
        if mode == 0:
          ub = [0] * (26 + 20)
          for k,v in user.items():
            if k in multiple_val:
              for e in v:
                #print e, coord[k][e] + 4
                ub[coord[k][e] + 20] = 1
            if k == 'Gender':
              ub[coord[k][v]] = 1
            if k == 'Age':
              ub[2 + coord[k][v]] = 1
            if k == 'CSP':
              ub[9 + coord[k][v]] = 1 
            if k == 'Geography':
              ub[16 + coord[k][v]] = 1
          wri_b.writerow(ub)
        elif mode == 2:
          ub = [0] * (26 + 20)
          u = [0] * (26 + 4)
          for k,v in user.items():
            if k in multiple_val:
              for e in v:
                #print e, coord[k][e] + 4
                ub[coord[k][e] + 20] = 1
                u[coord[k][e] + 4] = val
            if k == 'Gender':
              ub[coord[k][v]] = 1
              u[0] = coord[k][v]
            if k == 'Age':
              ub[2 + coord[k][v]] = 1
              u[1] = coord[k][v]
            if k == 'CSP':
              ub[9 + coord[k][v]] = 1 
              u[2] = coord[k][v]
            if k == 'Geography':
              ub[16 + coord[k][v]] = 1
              u[3] = coord[k][v]
          wri.writerow(u)
          wri_b.writerow(ub)
        elif mode == 1:
          u = [0] * (26 + 4)
          for k,v in user.items():
            if k in multiple_val:
              for e in v:
                #print e, coord[k][e] + 4
                u[coord[k][e] + 4] = val
            if k == 'Gender':
              u[0] = coord[k][v]
            if k == 'Age':
              u[1] = coord[k][v]
            if k == 'CSP':
              u[2] = coord[k][v]
            if k == 'Geography':
              u[3] = coord[k][v]
          wri.writerow(u)
        yield user