def __init__(self, model, population_size,n_blobs, n_features,home_district_in_position,iseed=None): self.model = model self.infected_blobs=[] self.roulette_distribution ={} self.feature_vector = {} self.vector_to_human = {} self.vector_to_home ={} self.vector_to_classroom = {} self.vector_to_office = {} self.vector_to_restaurant = {} self.unit_info_map = self.unit_info_map() self.teachers_per_classroom = 1 self.strid_to_human = self.strid_to_human() n_vec = population_size blobs,assignments = make_blobs( n_samples=n_vec, n_features=n_features, centers=n_blobs, cluster_std=0.1,#1.0 center_box=(-10.0, 10.0), shuffle=False, random_state=iseed ) self.n_blobs = n_blobs self.home_district_in_position = home_district_in_position self.blob_dict ={} self.vector_to_blob = {} for vec,assignment in zip(blobs,assignments): if assignment not in self.blob_dict: self.blob_dict[assignment] = [] self.vector_to_blob[tuple(vec)] = assignment self.blob_dict[assignment].append(vec) self.vectors = blobs #self.vectors = KeyedVectors(n_features) #numlist = range(n_vec) #self.vectors.add(numlist,blobs[:]) #for i in range(n_vec): #self.vectors.add_vector(i, blobs[i,:]) #vectors.add_vector(str(i), blobs[i,:]) #print (numlist) #print(blobs) #print (self.vectors) for i in range(n_vec): #vector1 = self.vectors.get_vector(i) vector1 = self.vectors[i] tuple_vec1 = tuple(vector1) similarities = KeyedVectors.cosine_similarities(vector1,self.vectors) #print (distances) #distances = self.vectors.cosine_similarities(vector1,self.vectors) #self.roulette_distribution[tuple_vec1] = {} temp ={} sum_similarities = (similarities-similarities.min()).sum() for j in range(n_vec): if i != j: vector2 = self.vectors[j] tuple_vec2 = tuple(vector2) temp[tuple_vec2] = (similarities[j] - similarities.min()) / sum_similarities self.roulette_distribution[tuple_vec1]=dict(sorted(temp.items(), key=lambda item: -item[1]))
if sim.startswith('manual') and bias[0] != 'zero': vocab = [ word for word in bias_labels[bias[0]] if word in labels ] vectors_all = np.asarray( [word_vectors[word] for word in vocab]) vector_1 = word_vectors[label] if sim.endswith('reject') and len(bias) > 2: bias_vector = word_vectors[bias[1]] - word_vectors[ bias[2]] bias_vector /= np.linalg.norm(bias_vector) vectors_all -= np.outer( np.dot(vectors_all, bias_vector), bias_vector) vector_1 = vector_1 - np.dot(vector_1, bias_vector) * bias_vector scores = word_vectors.cosine_similarities( vector_1, vectors_all) scores, vocab = zip( *sorted(zip(scores, vocab), key=lambda x: (-x[0], label != x[1], x[1] in secondary_labels))[:topk]) f.write('%s %s ' % (label, bias[0]) + ' '.join('%.4f %s' % (scores[k], vocab[k]) for k in range(len(vocab))) + '\n') if scores[0] >= threshold: biased = [ v for v, s in zip(vocab, scores) if s > threshold ] elif len(bias) > 1: biased = get_bias(label, bias[1:], sim, threshold) row.append('/'.join(biased)) rows.append(row)
def similarity(self, tup_vec1, tup_vec2): vec1 = np.array(list(tup_vec1)) vec2 = np.array(list(tup_vec2)) sim = KeyedVectors.cosine_similarities(vec1, [vec2]) return sim[0]