Example #1
0
def cosim_search(e,q=None,qw=None):
    if (q is None) or (qw is None):
        return cosim(e,e)
    else:
        idx = [i for i,v in enumerate(qw) if v != 0]
        e = e.as_matrix()
        e_idx = e[:,idx]
        q_idx = q[idx].reshape(1,-1)
        return cosim(q_idx,e_idx).flatten()
def cosim_search(e, q, qw):
    idx = [i for i, v in enumerate(qw) if v != 0]

    e = e.as_matrix()
    e_idx = e[:, idx]
    q_idx = q[idx].reshape(1, -1)

    return cosim(q_idx, e_idx).flatten()
def calc_cosine_similarity(data_matrix):
    # Calculate cosine similarity matrix
    cosim_matrix = cosim(data_matrix)

    # Calculate upper half of the cosim matrix without diagonal
    upper_triangle = cosim_matrix[np.triu_indices_from(cosim_matrix, k=1)]
    # Find matrix minimum and maximum greater than zero
    print("Cosim Matrix Minimum: " + str(np.amin(upper_triangle)))
    print("Cosim Matrix Maximum: " + str(np.amax(upper_triangle)))

    return cosim_matrix
Example #4
0
 def response(user_response):
     robo_response = ''
     ChatResponse.sent_token.append(user_response)
     TfidfVec = tfvec(tokenizer=lemitizer.LemNormalize,
                      stop_words='english')
     tfidf = TfidfVec.fit_transform(ChatResponse.sent_token)
     vals = cosim(tfidf[-1], tfidf)
     idx = vals.argsort()[0][-2]
     flat = vals.flatten()
     flat.sort()
     req_tfidf = flat[-2]
     if (req_tfidf == 0):
         robo_response = robo_response + "I am sorry! I don't understand you"
         return robo_response
     else:
         robo_response = robo_response + ChatResponse.sent_token[idx]
         return robo_response
def cosine_sim(x):
    li = []
    for item in x["sent_emb"][0]:
        li.append(cosim(item,x["quest_emb"][0][0]))
    return li 
Example #6
0
from sklearn.metrics.pairwise import cosine_similarity as cosim

compSim = np.zeros([np.size(compMat, 0), np.size(compMat, 0)])
for i in range(np.size(compMat, 0)):
    print(i)
    for j in range(np.size(compMat, 0)):
        a1 = np.asarray(compBool[i, :]).astype(np.bool)
        a2 = np.asarray(compBool[j, :]).astype(np.bool)
        if (a1.sum() + a2.sum() == 0):
            compSim[i, j] = 1
        else:
            intersection = np.logical_and(a1, a2)
            compSim[i, j] = 2. * intersection.sum() / (a1.sum() + a2.sum())
    print(compSim[1, i])
kinaseSim = cosim(kinaseFeat)
nt = len(kinaseSim)
#%% Clustering
import matplotlib.pyplot as plt
from scipy.spatial.distance import squareform
from scipy.spatial.distance import pdist

d = pdist(compMat, metric='hamming')
from scipy.cluster.hierarchy import linkage

z = linkage(d, method='complete')
from scipy.cluster.hierarchy import dendrogram

plt.figure()
dn = dendrogram(z)
Example #7
0
import embed_functions as emb
from glob import glob

def cosim_search(e,q=None,qw=None):
    if (q is None) or (qw is None):
        return cosim(e,e)
    else:
        idx = [i for i,v in enumerate(qw) if v != 0]
        e = e.as_matrix()
        e_idx = e[:,idx]
        q_idx = q[idx].reshape(1,-1)
        return cosim(q_idx,e_idx).flatten()

path_work = '/mnt/HA/groups/rosenGrp/embed_cons/out/kegg'
params = ['6_256_5_50_10_1e-06_100','10_256_5_50_10_1e-06_100']
suffix = ['remb.csv.gz','remb_raw.csv.gz']

for p in params:
    path_emb = glob(path_work + '/*' + p + '*.csv.gz')

    for suf in suffix:
        mods = [path for path in path_emb if suf in path]
        if 'vcons' not in mods[0]:
            mods = [mods[1],mods[0]]
        remb_cons = pd.read_csv(mods[0],index_col=0)
        remb_clust = pd.read_csv(mods[1],index_col=0)
        sim = pd.DataFrame(cosim(remb_cons.as_matrix(),remb_clust.as_matrix()),
                index=remb_cons.index,columns=remb_clust.index)
        sim.to_csv(os.path.join(path_work,'cosim_cons_' + p + '_1e-05_' + suf))