Esempio n. 1
0
	def f4(self, pos_tuples, neg_tuples, msg_attr_index):
		i = msg_attr_index
		distinct_pos = set(map(lambda (u,m,r,d): m[msg_attr_index], pos_tuples))
		mtch = lambda v, tups: filter(lambda (u,m,r,d): m[i] == v, tups)
		wv = lambda v, tups: len(mtch(v, tups)) * sum(map(lambda (u,m,r,d): 1.0 / float(d + 1), mtch(v, tups)))
		twv = lambda v: wv(v, pos_tuples) - wv(v, neg_tuples)
		return ut.top_n(map(lambda x: (x, twv(x)), distinct_pos), 1, lambda x: x[1])[0][0]
Esempio n. 2
0
def build_weighted_max_pos_proportion_selector(weight_f = lambda x: 1):
	def wps(pos, neg):
		pv = weighted_agg(pos, weight_f)
		nv = weighted_agg(neg, weight_f)
		f = lambda key: float(pv[key]) / float(pv[key]) + float(nv[key]) if key in nv.keys() else 1.0
		return map(lambda x: (x, f(x)), pv.keys())
	return lambda p, n: ut.top_n(wps(p, n), 1, lambda (a, v): v)[0]
def build_top_n_optimizer(users, inters, resps, top_n, model_builder, relative_percent_compare=False):
	tups = zip(users, inters, resps)
	pos_count = lambda y: sum(map(lambda x: x[2], filter(lambda z: y == z[1], tups)))
	rel_pos_perc = lambda y: float(pos_count(y)) / float(len(filter(lambda z: y == z[1], tups)))
	top_inters = None
	if relative_percent_compare:
		top_inters = ut.top_n(inters, top_n, rel_pos_perc)
	else:
		top_inters = ut.top_n(inters, top_n, pos_count)
	top_tups = filter(lambda x: x[1] in top_inters, tups)
	print(top_tups[:5])
	model_builder.set_data_rows(tups)
	f = lambda x: ut.top_n(top_inters, 1, lambda y: model_builder.prob_f()(x, y))[0]
	return f
	# best_message, best_count = n_best_inters(users, inters, resps, 1)[0]
	# return lambda x: best_message
def n_best_inters(users, inters, responses, n):
    rows = zip(users, inters, responses)
    mcount = lambda m: sum(
        map(lambda x: x[2], filter(lambda y: y[1] == m, rows)))
    pos_count = lambda y: sum(
        map(lambda x: x[2], filter(lambda z: y == z[1], tups)))
    results = map(lambda msg: (msg, mcount(msg)), inters)
    return ut.top_n(results, n, lambda x: x[1])
def n_best_messages(users, data_gen, msgs, n):
	if type(msgs) == type(0):
		msgs = data_gen.gen_random_inters(msgs)
	rows = zip(*data_gen.gen_crossprod_rows(users, msgs))
	mcount = lambda m: sum(map(lambda x: x[2], filter(lambda y: y[1] == m, rows)))
	pos_count = lambda y: sum(map(lambda x: x[2], filter(lambda z: y == z[1], tups)))
	results = map(lambda msg: (msg, mcount(msg)), msgs)
	return map(lambda (msg, _): msg, ut.top_n(results, n, lambda y: y[1]))
Esempio n. 6
0
	def knn(self, user, k):
		if not(self.cache.has_key(user)):
			nbs = map(lambda (u, p, n): (u, p, n, self.similarity_f(user, u)), self.data)
			nn = ut.top_n(nbs, k, lambda (u,p,n,s): s)
			pos = self.normalize(map(lambda (u,p,n,s): (p,s), nn))
			neg = self.normalize(map(lambda (u,p,n,s): (n,s), nn))
			self.cache[user] = (pos, neg)
		return self.cache[user]
Esempio n. 7
0
	def knn(self, user, k, resp = None):
		rows = self.data_rows
		if resp == 1:
			rows = self.positives
		elif resp == 0:
			rows = self.negatives
		dists = map(lambda (u, m, r): (u, m, r, self.distance_f(user, u)), rows)
		#print(ut.top_n(dists, lambda (u, m, r, d): 1.0 / float(d + 1)) )
		return ut.top_n(dists, lambda (u, m, r, d): 1.0 / float(d + 1)) 
Esempio n. 8
0
 def f4(self, pos_tuples, neg_tuples, msg_attr_index):
     i = msg_attr_index
     distinct_pos = set(
         map(lambda (u, m, r, d): m[msg_attr_index], pos_tuples))
     mtch = lambda v, tups: filter(lambda (u, m, r, d): m[i] == v, tups)
     wv = lambda v, tups: len(mtch(v, tups)) * sum(
         map(lambda (u, m, r, d): 1.0 / float(d + 1), mtch(v, tups)))
     twv = lambda v: wv(v, pos_tuples) - wv(v, neg_tuples)
     return ut.top_n(map(lambda x: (x, twv(x)), distinct_pos), 1,
                     lambda x: x[1])[0][0]
Esempio n. 9
0
def n_best_messages(users, data_gen, msgs, n):
    if type(msgs) == type(0):
        msgs = data_gen.gen_random_inters(msgs)
    rows = zip(*data_gen.gen_crossprod_rows(users, msgs))
    mcount = lambda m: sum(
        map(lambda x: x[2], filter(lambda y: y[1] == m, rows)))
    pos_count = lambda y: sum(
        map(lambda x: x[2], filter(lambda z: y == z[1], tups)))
    results = map(lambda msg: (msg, mcount(msg)), msgs)
    return map(lambda (msg, _): msg, ut.top_n(results, n, lambda y: y[1]))
Esempio n. 10
0
def build_top_n_optimizer(users,
                          inters,
                          resps,
                          top_n,
                          model_builder,
                          relative_percent_compare=False):
    tups = zip(users, inters, resps)
    pos_count = lambda y: sum(
        map(lambda x: x[2], filter(lambda z: y == z[1], tups)))
    rel_pos_perc = lambda y: float(pos_count(y)) / float(
        len(filter(lambda z: y == z[1], tups)))
    top_inters = None
    if relative_percent_compare:
        top_inters = ut.top_n(inters, top_n, rel_pos_perc)
    else:
        top_inters = ut.top_n(inters, top_n, pos_count)
    top_tups = filter(lambda x: x[1] in top_inters, tups)
    print(top_tups[:5])
    model_builder.set_data_rows(tups)
    f = lambda x: ut.top_n(top_inters, 1, lambda y: model_builder.prob_f()
                           (x, y))[0]
    return f
    # best_message, best_count = n_best_inters(users, inters, resps, 1)[0]
    # return lambda x: best_message
Esempio n. 11
0
def build_weighted_mode_selector(weight_f = lambda x: 1):
	return lambda pos, neg: ut.top_n(weighted_agg(pos, weight_f).items(), 1, lambda (a, v): v)[0][0]	
Esempio n. 12
0
def max_ratio_occurrences_in(in_list, out_list):
    prp_occs = map(
        lambda x: (x, float(1 + count_in(x, in_list)) / float(1 + count_in(
            x, out_list))), in_list)
    return ut.top_n(prp_occs, 1, lambda x: x[1])[0][0]
Esempio n. 13
0
def max_net_occurrences_in(in_list, out_list):
    net_occs = map(lambda x: (x, count_in(x, in_list) - count_in(x, out_list)),
                   in_list)
    return ut.top_n(net_occs, 1, lambda x: x[1])[0][0]
Esempio n. 14
0
def mode(vals):
    cts = map(lambda x: (x, count_in(x, vals)), vals)
    return ut.top_n(cts, 1, lambda x: x[1])[0][0]
def n_best_inters(users, inters, responses, n):
	rows = zip(users, inters, responses)
	mcount = lambda m: sum(map(lambda x: x[2], filter(lambda y: y[1] == m, rows)))
	pos_count = lambda y: sum(map(lambda x: x[2], filter(lambda z: y == z[1], tups)))
	results = map(lambda msg: (msg, mcount(msg)), inters)
	return ut.top_n(results, n, lambda x: x[1])
Esempio n. 16
0
def max_ratio_occurrences_in(in_list, out_list):
	prp_occs = map(lambda x: (x, float(1 + count_in(x, in_list)) / float(1 + count_in(x, out_list))), in_list)
	return ut.top_n(prp_occs, 1, lambda x: x[1])[0][0]
Esempio n. 17
0
def max_net_occurrences_in(in_list, out_list):
	net_occs = map(lambda x: (x, count_in(x, in_list) - count_in(x, out_list)), in_list)
	return ut.top_n(net_occs, 1, lambda x: x[1])[0][0]
Esempio n. 18
0
def mode(vals):
	cts = map(lambda x: (x, count_in(x, vals)), vals) 
	return ut.top_n(cts, 1, lambda x: x[1])[0][0]
def n_best_messages(datagen, users, messages, n):
	mcount = lambda m: sum(datagen.gen_crossprod_rows(users, [m])[2])
	results = map(lambda msg: (msg, mcount(msg)), messages)
	return ut.top_n(results, n, lambda x: x[1])