コード例 #1
0
def test(request):
    print("class_1 ......")
    try:
        current_page = request.GET.get('p')
        user_name = request.session.get('user_name')
        rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
        db_ans = rec_db.find({"class_name": "病虫害"})
        data = {}
        ans_list = []
        count = 0
        for i, news in enumerate(list(db_ans), 0):
            ans_list.append({
                "news": news,
                "news_id": str(news["_id"]),

                #"href": "#href_id%d" % (i),
                #"content_id": "href_id%d" % (i),

                #"click_id": "ajax_id_%d" % (i),
                #"ajax_id": "#ajax_id_%d" % (i),
            })
            count += 1
        page_obj = Pagination(count, current_page)
        data_list = ans_list[page_obj.start():page_obj.end()]
        data["user_name"] = user_name
    except:
        return index(request)
    return render(request,
                  ROOT_URL + "/recommend_templates/templates/test.html", {
                      'data': data_list,
                      'page_obj': page_obj
                  })
コード例 #2
0
ファイル: main.py プロジェクト: PGDIP/Agri
def tfidf2Txt():
    rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
    content_dict = list(rec_db.find())
    key_bucket = []
    content = []
    for i, content_k in enumerate(content_dict, 0):
        key_bucket.append(
            [content_k["_id"], content_k["title"], content_k["class_name"]])
        content.append(content_k["jieba_cut_content"])
    key_bucket_df = pd.DataFrame(key_bucket,
                                 columns=["_id", "title", "class_name"])

    vectorizer = CountVectorizer(
    )  # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
    transformer = TfidfTransformer()  # 该类会统计每个词语的tf-idf权值
    train_set_vector = vectorizer.fit_transform(
        content)  # 将文本转为词频矩阵,返回[(文章idx,词语id),词频]......
    tfidf = transformer.fit_transform(
        train_set_vector)  # 计算tf-idf,返回[(文章idx,词语id),tf-idf值]......
    word = vectorizer.get_feature_names()  # 获取词袋模型中的所有词语(汉字)
    weight = tfidf.toarray()  # 将tf-idf矩阵抽取出来,元素weight[i][j]表示j词在i类文本中的tf-idf权重
    #print "整个样本集合中(样本个数*词库大小) = ",weight.shape
    nbrs = NearestNeighbors(n_neighbors=10, algorithm="ball_tree").fit(weight)
    #返回距离每个点k个最近的点和距离指数,indices可以理解为表示点的下标,distances为距离
    distances, indices = nbrs.kneighbors(weight)
    k_nrbs_list = []
    for i in range(len(indices)):
        k_nrbs_list.append(" ".join(["%d" % x for x in indices[i]]))
    key_bucket_df["k_nbrs"] = pd.Series(k_nrbs_list)
    key_bucket_df.to_csv("./recSys/data/key_bucket.csv",
                         index=False,
                         index_label=False)
コード例 #3
0
def count_click_times(request):
    # print "count_click_times ...... "
    if request.POST:
        news_id = request.POST.get('news_id')
        user_name = request.POST.get('user_name')
    else:
        news_id = request.GET.get('news_id')
        user_name = request.GET.get('user_name')
    try:
        rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'user')
        db_ans = rec_db.find({"user_name": user_name})[0]
        if db_ans.get("looked_list") is None:
            looked_list = set([news_id])
        else:
            looked_list = set(list(db_ans["looked_list"]))
            looked_list = looked_list | set([news_id])
        # print news_id, user_name
        rec_db.update(
            {'user_name': user_name},
            {'$set': {
                "looked_list": list(looked_list)
            }},
        )
    except:
        return index(request)
コード例 #4
0
def userRegist(user_name, user_passwd):
    value = {
        "user_name": user_name,
        "user_passwd": user_passwd,
        "user_read_id": ""
    }
    rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'users')
    rec_db.insert(value, "users")
コード例 #5
0
def userLogin(user_name, user_passwd):
    rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'users')
    user_id = rec_db.find({
        "user_name": user_name,
        "user_passwd": user_passwd,
    })
    if user_id != None:
        return "success"
    else:
        return user_id["_id"]
コード例 #6
0
ファイル: main.py プロジェクト: PGDIP/Agri
def updateDB():
    # mongodb使用AgriRecSys数据库,默认数据库集合为: news集合
    rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
    news_dict_list = [
        #paserClass1(),
        paserClass2(),
        paserClass3(),
        paserClass4(),
        paserClass5(),
    ]
    #print "end get passer list"
    for news_dict in news_dict_list:
        if len(news_dict) == 0:
            continue
        for key, value in news_dict.items():
            #print "success insert.",key
            rec_db.insert(value, "news")  # 向 news集合中插入
コード例 #7
0
def history(request):
    # print "history ......"
    data = {}
    user_name = request.session.get('user_name')
    try:
        rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'user')
        db_ans = rec_db.find({"user_name": user_name})[0]
        new_id_list = db_ans.get("looked_list")
        ans_list = []
        for i, news_id in enumerate(new_id_list, 0):
            rec_d = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
            db_ans = rec_d.find({'_id': ObjectId(news_id)})
            if db_ans.count() == 0:
                continue
            db_ans = db_ans[0]
            ans_list.append({
                "news": db_ans,
                "news_id": str(db_ans["_id"]),
                "href": "#href_id%d" % (i),
                "content_id": "href_id%d" % (i),
                "click_id": "ajax_id_%d" % (i),
                "ajax_id": "#ajax_id_%d" % (i),
            })
        data["user_name"] = user_name
        data["news_list"] = ans_list
    except:
        return index(request)
    return render(request,
                  ROOT_URL + "/recommend_templates/templates/history.html",
                  data)  # 注意路径一定要写对
コード例 #8
0
def myRecommend(request, data={}):
    # print "myRecommend ......"
    user_name = request.session.get('user_name')
    try:
        rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'user')
        db_ans = rec_db.find({"user_name": user_name})[0]
        new_id_list = db_ans.get("looked_list")
        if new_id_list == None:  # 面对冷启动问题
            pass  #
        else:
            # print "========*****#######*********"
            rec_new_id_list = get_K_nearst_love(8,
                                                new_id_list)  # 推荐5个最优新闻名称给用户
        # print "========**************",new_id_list
        ans_list = []
        for i, news_id in enumerate(rec_new_id_list, 0):
            rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
            db_ans = rec_db.find({"_id": ObjectId(news_id)})
            if db_ans.count() == 0: continue
            db_ans = db_ans[0]
            ans_list.append({
                "news": db_ans,
                "news_id": str(db_ans["_id"]),
                "href": "#href_id%d" % (i),
                "content_id": "href_id%d" % (i),
                "click_id": "ajax_id_%d" % (i),
                "ajax_id": "#ajax_id_%d" % (i),
            })
        data["user_name"] = user_name
        data["news_list"] = ans_list
    except:
        return index(request)
    return render(request,
                  ROOT_URL + "/recommend_templates/templates/myRecommend.html",
                  data)  # 注意路径一定要写对
コード例 #9
0
def class_4(request, data={}):
    # print "class_4 ......"
    try:
        user_name = request.session.get('user_name')
        rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
        db_ans = rec_db.find({"class_name": "市场价格"})
        data = {}
        ans_list = []
        for i, news in enumerate(list(db_ans), 0):
            ans_list.append({
                "news": news,
                "news_id": str(news["_id"]),
                "href": "#href_id%d" % (i),
                "content_id": "href_id%d" % (i),
                "click_id": "ajax_id_%d" % (i),
                "ajax_id": "#ajax_id_%d" % (i),
            })
        data["news_list"] = ans_list
        data["user_name"] = user_name
    except:
        return index(request)
    return render(request,
                  ROOT_URL + "/recommend_templates/templates/class_4.html",
                  data)  # 注意路径一定要写对
コード例 #10
0
ファイル: main.py プロジェクト: PGDIP/Agri
    weight = tfidf.toarray()  # 将tf-idf矩阵抽取出来,元素weight[i][j]表示j词在i类文本中的tf-idf权重
    #print "整个样本集合中(样本个数*词库大小) = ",weight.shape
    nbrs = NearestNeighbors(n_neighbors=10, algorithm="ball_tree").fit(weight)
    #返回距离每个点k个最近的点和距离指数,indices可以理解为表示点的下标,distances为距离
    distances, indices = nbrs.kneighbors(weight)
    k_nrbs_list = []
    for i in range(len(indices)):
        k_nrbs_list.append(" ".join(["%d" % x for x in indices[i]]))
    key_bucket_df["k_nbrs"] = pd.Series(k_nrbs_list)
    key_bucket_df.to_csv("./recSys/data/key_bucket.csv",
                         index=False,
                         index_label=False)


if __name__ == '__main__':
    rec_db = MongoOperator('localhost', 27017, 'AgriRecSys', 'news')
    rec_db.remove("news")  #清空数据库
    # rec_db.remove("user") #清空数据库
    # updateDB()  # 爬虫模块入口

    from recommend_templates.Main.paserManager.util import CorrectIp
    ci = CorrectIp()
    ci.getCorrectIp()  #代理ip

    gzb = GZB(ROOT_PATH)  #  .........耕种帮.........
    gzb.get_url_from_each_page()

    # zgny = ZGNYKJ(ROOT_PATH)  #  .........中国农业科技.........
    # zgny.main()
    #
    tfidf2Txt()  # tf-idf和knn算法入口