def cosim_search(e,q=None,qw=None): if (q is None) or (qw is None): return cosim(e,e) else: idx = [i for i,v in enumerate(qw) if v != 0] e = e.as_matrix() e_idx = e[:,idx] q_idx = q[idx].reshape(1,-1) return cosim(q_idx,e_idx).flatten()
def cosim_search(e, q, qw): idx = [i for i, v in enumerate(qw) if v != 0] e = e.as_matrix() e_idx = e[:, idx] q_idx = q[idx].reshape(1, -1) return cosim(q_idx, e_idx).flatten()
def calc_cosine_similarity(data_matrix): # Calculate cosine similarity matrix cosim_matrix = cosim(data_matrix) # Calculate upper half of the cosim matrix without diagonal upper_triangle = cosim_matrix[np.triu_indices_from(cosim_matrix, k=1)] # Find matrix minimum and maximum greater than zero print("Cosim Matrix Minimum: " + str(np.amin(upper_triangle))) print("Cosim Matrix Maximum: " + str(np.amax(upper_triangle))) return cosim_matrix
def response(user_response): robo_response = '' ChatResponse.sent_token.append(user_response) TfidfVec = tfvec(tokenizer=lemitizer.LemNormalize, stop_words='english') tfidf = TfidfVec.fit_transform(ChatResponse.sent_token) vals = cosim(tfidf[-1], tfidf) idx = vals.argsort()[0][-2] flat = vals.flatten() flat.sort() req_tfidf = flat[-2] if (req_tfidf == 0): robo_response = robo_response + "I am sorry! I don't understand you" return robo_response else: robo_response = robo_response + ChatResponse.sent_token[idx] return robo_response
def cosine_sim(x): li = [] for item in x["sent_emb"][0]: li.append(cosim(item,x["quest_emb"][0][0])) return li
from sklearn.metrics.pairwise import cosine_similarity as cosim compSim = np.zeros([np.size(compMat, 0), np.size(compMat, 0)]) for i in range(np.size(compMat, 0)): print(i) for j in range(np.size(compMat, 0)): a1 = np.asarray(compBool[i, :]).astype(np.bool) a2 = np.asarray(compBool[j, :]).astype(np.bool) if (a1.sum() + a2.sum() == 0): compSim[i, j] = 1 else: intersection = np.logical_and(a1, a2) compSim[i, j] = 2. * intersection.sum() / (a1.sum() + a2.sum()) print(compSim[1, i]) kinaseSim = cosim(kinaseFeat) nt = len(kinaseSim) #%% Clustering import matplotlib.pyplot as plt from scipy.spatial.distance import squareform from scipy.spatial.distance import pdist d = pdist(compMat, metric='hamming') from scipy.cluster.hierarchy import linkage z = linkage(d, method='complete') from scipy.cluster.hierarchy import dendrogram plt.figure() dn = dendrogram(z)
import embed_functions as emb from glob import glob def cosim_search(e,q=None,qw=None): if (q is None) or (qw is None): return cosim(e,e) else: idx = [i for i,v in enumerate(qw) if v != 0] e = e.as_matrix() e_idx = e[:,idx] q_idx = q[idx].reshape(1,-1) return cosim(q_idx,e_idx).flatten() path_work = '/mnt/HA/groups/rosenGrp/embed_cons/out/kegg' params = ['6_256_5_50_10_1e-06_100','10_256_5_50_10_1e-06_100'] suffix = ['remb.csv.gz','remb_raw.csv.gz'] for p in params: path_emb = glob(path_work + '/*' + p + '*.csv.gz') for suf in suffix: mods = [path for path in path_emb if suf in path] if 'vcons' not in mods[0]: mods = [mods[1],mods[0]] remb_cons = pd.read_csv(mods[0],index_col=0) remb_clust = pd.read_csv(mods[1],index_col=0) sim = pd.DataFrame(cosim(remb_cons.as_matrix(),remb_clust.as_matrix()), index=remb_cons.index,columns=remb_clust.index) sim.to_csv(os.path.join(path_work,'cosim_cons_' + p + '_1e-05_' + suf))