예제 #1
0
def get_graph():
    """
    get explicit params from url
    接受url的Param作为root nodeId, 然后返回一张dag
    dag似乎可以提前就计算好? 因为要有children node的相关度排序
    计算好之后就直接提出来用? 在app载入的时候就加到内存里面去
    考虑可拓展性? 如果图特别大呢, 那显然和设计的架构不一致, 
    因为构想是前端处理整个graph的裁剪, 后端提供整张图
    如果图很大, 那么后端必须裁剪好前端直接用, 不能指望前端那点计算能力
    因为不止一个点, <del>但是现在没做多关键词查询</del>, 如果做了那么就是一个列表
    <del>params里面传入的应该是一个serialized json string, 然后解析为json然后得到数组</del>
    使用;分隔, 就像search那样
    """

    # 然而现在需要考虑同时查询两个
    wids = request.args.get('wids', '').lower()

    print('graph params', wids)

    # params = GraphParams(request.args)
    if not wids:
        response = Response(ResponseType.PARAMETERS_ERR)
        return response.get_json()

    try:
        # 由于概念格的特点, 子节点关键词集合一定包含父节点的关键词集合, 文档集合反之
        # 找到含有extent的所有点, 然后把data里面无关的连线删掉, 找到fah==[]就是新root
        wids = convert_int(wids.split(';'))
        g = sub_graph(NODES, wids)
        # add data in g
        g = add_info(g, KEYWORDS_DICT)
        if len(g) == 1:
            # response = Response.get_custom_response(ResponseType.FAILURE, 'No Such sub graph')
            response = Response(ResponseType.SUCCESS)
            response.update_attr('graph', g)
        else:
            response = Response(ResponseType.SUCCESS)
            response.update_attr('graph', g)
    except Exception as e:
        print(e)
        response = Response(ResponseType.INTERNAL_ERR)
    return response.get_json()
예제 #2
0
def get_courses_list():
    # 返回图和返回文章的接口分离
    # 因为文章还要翻页 查询文章直接在mongodb中查询
    # 因为不需要做查询相关度排序, 提供排序为, 年份, 默认(字典序), 被引量

    try:
        course_params = CourseListParams(request.args)
        # validate params passed in
        if not course_params.validate():
            raise ValidationError
        # return courses
        node_id = course_params.node_id.data
        page = course_params.page.data
        page_size = course_params.page_size.data
        # order = course_params.order.data

        node = DB['nodes'].find_one({'nid': node_id})
        if not node:
            raise ValueError('no such node id exists')
        extents = node['extent']
        courses = DB['courses'].find({'cid': {"$in": extents}}) \
                                .limit(page_size) \
                                .skip((page-1)*page_size)
        # if order == 'time':
        #     courses = DB['courses'].find({'cid': {"$in": extents}}) \
        #                         .sort([('date',-1)]).limit(page_size) \
        #                         .skip((page-1)*page_size)
        # elif order == 'citation':
        #     courses = DB['courses'].find({'cid': {"$in": extents}}) \
        #                         .sort([('citation',-1)]).limit(page_size) \
        #                         .skip((page-1)*page_size)
        courses = [after_pop(d, '_id') for d in courses]
        response = Response(ResponseType.SUCCESS)
        response.update_attr('courses', courses)
        response.update_attr('page', page)
        response.update_attr('pageSize', page_size)
        response.update_attr('totalSize', len(extents))
    except ValidationError as ve:
        print(ve)
        response = Response(ResponseType.PARAMETERS_ERR)
    except ValueError as ve:
        response = Response.get_custom_response(ResponseType.FAILURE, str(ve))
    except Exception as e:
        print(e)
        response = Response(ResponseType.INTERNAL_ERR)

    return response.get_json()
예제 #3
0
def user_rating():
    """
    打分
    /api/user-rating
    """

    try:
        token = request.headers.get('Authorization', '')

        # by convention jwt token is like "Bearer <token string>"
        uname = verify_auth_token(token[7:])
        if not uname:
            raise ValueError('Invalid Token')

        user = DB['users'].find_one({'username': uname})
        user_rates = user.get('rates', [])
        user_eval = user.get('evaluation', [])

        if not user_rates:
            user_rates = []
        if not user_eval:
            user_eval = []
        response = Response(ResponseType.SUCCESS)
        response.update_attr(
            'rates', {str(rate['cid']): rate['rate']
                      for rate in user_rates})
        response.update_attr(
            'evaluation',
            {str(rate['cid']): rate['rate']
             for rate in user_eval})

    except ValueError as ve:
        response = Response.get_custom_response(ResponseType.FAILURE, str(ve))
    except Exception as e:
        print(e)
        response = Response(ResponseType.INTERNAL_ERR)

    return response.get_json()
예제 #4
0
def rating():
    """
    打分
    /api/rating?course_id=<int>&rate=<int>&is_evaluation=<bool>
    """

    try:
        rate_params = RatingParams(request.args)
        if not rate_params.validate():
            raise ValidationError
        rate = rate_params.rate.data
        course_id = rate_params.course_id.data
        is_evalution = rate_params.is_evaluation.data
        token = request.headers.get('Authorization', '')

        # by convention jwt token is like "Bearer <token string>"
        uname = verify_auth_token(token[7:])
        if not uname:
            raise ValueError('Invalid Token')

        if is_evalution:
            # check whether this rate exists
            if not DB['users'].find_one({
                    'username': uname,
                    'evaluation.cid': course_id
            }):
                DB['users'].update({'username': uname}, {
                    '$push': {
                        'evaluation': {
                            'cid': course_id,
                            'rate': rate
                        }
                    }
                })
            else:
                DB['users'].update(
                    {
                        'username': uname,
                        'evaluation.cid': course_id
                    }, {'$set': {
                        'evaluation.$.rate': rate
                    }})
        else:
            # check whether this rate exists
            if not DB['users'].find_one({
                    'username': uname,
                    'rates.cid': course_id
            }):
                DB['users'].update(
                    {'username': uname},
                    {'$push': {
                        'rates': {
                            'cid': course_id,
                            'rate': rate
                        }
                    }})
            else:
                DB['users'].update({
                    'username': uname,
                    'rates.cid': course_id
                }, {'$set': {
                    'rates.$.rate': rate
                }})

        response = Response(ResponseType.SUCCESS)

    except ValidationError:
        response = Response(ResponseType.VALIDATION_ERR)
    except ValueError as ve:
        response = Response.get_custom_response(ResponseType.FAILURE, str(ve))
    except Exception as e:
        print(e)
        response = Response(ResponseType.INTERNAL_ERR)

    return response.get_json()
예제 #5
0
def recommend_courses():
    """
    recommend courses
    /api/recommend?page=<int>&page_size=<int>
    """

    try:
        recommend_params = RecommendCoursesParams(request.args)
        if not recommend_params.validate():
            raise ValidationError

        page = recommend_params.page.data
        page_size = recommend_params.page_size.data
        mode = recommend_params.mode.data
        token = request.headers.get('Authorization', '')

        # auth validation
        uname = verify_auth_token(token[7:])
        if not uname:
            raise ValueError('Invalid Token')

        # find user
        user = DB['users'].find_one({'username': uname})
        if not user:
            raise ValueError('User Not Found')

        # create user model
        user_rates = user.get('rates', None)
        if not user_rates:
            return Response.get_custom_response(
                ResponseType.FAILURE, "unable to recommend").get_json()

        course_vectors = COURSE_VECTORS_DICT[str(mode)]
        user_model = recommend(user_rates, course_vectors)

        # store user model
        DB['users'].update({'username': uname},
                           {'$set': {
                               'usermodel': user_model
                           }})

        user_courses = [x['cid'] for x in user_model]

        # change the order the full rating items
        user_rates_dict = {x['cid']: x['rate'] for x in user_rates}
        rate5 = [c for c in user_courses if user_rates_dict.get(c, 0) == 5]
        rate4 = [c for c in user_courses if user_rates_dict.get(c, 0) == 4]
        other_rates = [
            c for c in user_courses if user_rates_dict.get(c, 0) < 4
        ]
        user_courses = rate5 + rate4 + other_rates

        courses = [
            after_pop(DB['courses'].find_one({'cid': a}), '_id')
            for a in user_courses[(page - 1) * page_size:page * page_size]
        ]

        response = Response(ResponseType.SUCCESS)
        response.update_attr('courses', courses)
        response.update_attr('page', page)
        response.update_attr('pageSize', page_size)
        response.update_attr('totalSize', len(user_courses))

    except ValidationError:
        response = Response(ResponseType.VALIDATION_ERR)
    except ValueError as ve:
        response = Response.get_custom_response(ResponseType.FAILURE, str(ve))
    except Exception as e:
        print('unknown err', e)
        response = Response(ResponseType.INTERNAL_ERR)
    return response.get_json()
예제 #6
0
def search():
    '''
    得到一个或者关键词之后, 如果这个或者这些关键词命中了
    返回命中和未命中的关键词, 以及命中的关键词分别的kId
    目前只做一个关键词的好了
    来了一个新词之后, 在所有关键词里面匹配, 精准命中就是直接返回
    如果不中, 那么就计算相似度, 然后返回相似度top5的关键词

    相似度计算可以使用编辑距离自动机或者是BK树, 不想写, 好麻烦
    如果之后要做到实时推荐, 输入一个词出一个词, 那么就需要这样做
    '''

    # 从url的请求参数中获取keyword, like /api/search?keywords=xxx;xxx
    keywords = request.args.get('keywords', '').lower()
    # keywords为一个string, 多个keywords使用;分隔, 因为一个keyword可能是phrase
    keywords = keywords.split(';')
    print(keywords)

    # 同时因为mongodb不会parse指定的key, 所以不存在sql注入的安全风险

    mongo_col = DB['keywords']  # keywords collection
    fail_list = []
    matched_list = []
    match = True
    wids = []
    recommend = []
    try:
        for k in keywords:
            res = mongo_col.find_one({'keyword': k})
            if res:
                matched_list.append(res['keyword'])
                wids.append(res['wid'])
            else:
                match = False
                fail_list.append(k)
        # 将匹配失败的词汇和所有的keywords进行相似度匹配
        match_num = 5
        if fail_list:
            best_matches = sorted(
                KEYWORDS,
                reverse=True,
                key=lambda kwobj: max(fail_list,
                                      key=lambda fail_kw: Levenshtein.ratio(
                                          kwobj['keyword'], fail_kw)))
            recommend = [x['keyword'] for x in best_matches[:match_num]]

        response = Response(ResponseType.SUCCESS)
        response.update_attr('keywords', matched_list)
        response.update_attr('match', match)
        response.update_attr('wids', wids)
        response.update_attr('recommend', recommend)
    except Exception as e:
        print(e)
        response = Response(ResponseType.FAILURE)
    return response.get_json()


# @app.route('/api/root', methods=['GET'], strict_slashes=False)
# def get_root():
#         rootId = 1
#         response = Response(ResponseType.SUCCESS)
#         response.update_attr('rootId', rootId)
#     except Exception as e:
#         print(e)
#         response = Response(ResponseType.FAILURE)
#     return response.get_json()