Ejemplo n.º 1
0
def online():
    datareader = Datareader(mode='online', only_load=True)

    print('NLP...')
    stopwords = STOP_WORDS
    token_weights = np.array(TOKEN_WEIGHTS)

    nlp = NLP(datareader, stopwords=[])
    ucm = nlp.get_ucm()
    #ucm = bm25_row(ucm)
    #inplace_csr_column_scale(ucm, token_weights)

    urm = datareader.get_urm_shrinked()[0]

    print('Computing similarity...')
    start = time.time()
    # Compute similarity
    similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1)
    similarity = similarity.tocsr()
    print(time.time() - start)

    print('Computing eurm...')
    start = time.time()
    # Compute eurm
    eurm_nlp = dot_product(similarity, urm, k=500)
    eurm_nlp = eurm_nlp.tocsr()
    print(eurm_nlp.shape)
    eurm_nlp = eurm_nlp[-10000:, :]

    sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_no_stop_online.npz', eurm_nlp)
Ejemplo n.º 2
0
    complete_name = mode+"_"+name+"_knn="+str(knn)+"_topk="+str(topk)\
                    + '_' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")

    if mode=="offline":
        complete_name+="_test="+str(test_num)


        bot = Bot_v1(complete_name)

        try:
            ######################SHRINKED
            dr = Datareader(mode=mode, test_num=test_num, train_format="50k", only_load=True)
            ev = Evaluator(dr)
            pids = dr.get_test_pids()

            urm, dictns, dict2 = dr.get_urm_shrinked()
            urm_evaluation = dr.get_evaluation_urm()[pids]

            pids_converted = np.array([dictns[x] for x in pids], dtype=np.int32)

            slim = MultiThreadDSLIM_RMSE(urm.T)

            slim.fit(l1_penalty=l1, l2_penalty=l2, positive_only=True, beta=beta, topK=topk)

            evaluate_shrinked(W_sparse= slim.W_sparse, urm_shrinked= urm, pids_shrinked= pids_converted)

            sps.save_npz(complete_name+".npz",slim.W_sparse,)

        except Exception as e:
            bot.error("Exception "+str(e))
Ejemplo n.º 3
0
    dr = Datareader(mode=mode,
                    verbose=True,
                    train_format=train_format,
                    only_load=True)

    if mode == 'offline':

        if len(train_format > 0):
            ####### DATA INIZIALIZATION SHRINKED #################
            dr = Datareader(mode=mode,
                            train_format=train_format,
                            only_load=True)

            ev = Evaluator(dr)
            pids = dr.get_test_pids()
            urm, dict_n_to_s, dict2 = dr.get_urm_shrinked()
            urm_evaluation = dr.get_evaluation_urm()
            urm_evaluation = urm_evaluation[pids]
            pids_converted = np.array([dict_n_to_s[x] for x in pids],
                                      dtype=np.int32)

        else:
            ####### DATA INIZIALIZATION FULL #################
            dr = Datareader(mode=mode, only_load=True, verbose=False)
            ev = Evaluator(dr)
            pids = dr.get_test_pids()

            urm = dr.get_urm()
            urm_evaluation = dr.get_evaluation_urm()
            urm_evaluation = urm_evaluation[pids]
            urm_evaluation = None