def preprocess():
    if not request.json or 'requirements' not in request.json:
        abort(
            400,
            'The input json is empty or it does not contain a requirements array'
        )
    stemmer = request.args.get('stemmer', '')
    if stemmer == "":
        abort(400, 'The stemmer parameter is missing')
    if stemmer != 'true' and stemmer != 'false':
        abort(400, 'The stemmer parameter is not correct')
    requirements = []
    for json_req in request.json['requirements']:
        if 'id' not in json_req:
            abort(400, 'There is a requirement without id')
        id = json_req['id']
        title = ''
        description = ''
        if 'title' in json_req and json_req['title'] is not None:
            title = json_req['title']
        if 'description' in json_req and json_req['description'] is not None:
            description = json_req['description']
        requirements.append(Requirement(id, title, description, ''))
    if len(requirements) == 0:
        abort(400, 'The input requirements array is empty')
    if stemmer == 'true':
        preprocessed_requirements = preprocessing.preprocess_requirements(
            requirements, True)
    else:
        preprocessed_requirements = preprocessing.preprocess_requirements(
            requirements, False)
    result = {'requirements': []}
    for requirement in preprocessed_requirements:
        result['requirements'].append(encoder(requirement))
    return result, 200
Exemple #2
0
def compute_popularity(body):  # noqa: E501
    """Retrieve a list with values for given set of requirements indicating their popularity for the crowd on twitter.

     # noqa: E501

    :param body: Requirement objects for which the social popularity should be measured
    :type body: list | bytes

    :rtype: List[RequirementPopularity]
    """
    response_list = []
    if connexion.request.is_json:
        content = connexion.request.get_json()
        assert isinstance(content, list)
        requirements = [Requirement.from_dict(d)
                        for d in content]  # noqa: E501
        requirements = list(
            map(
                lambda r: requirement.Requirement(r.id, r.title, r.description
                                                  ), requirements))
        requirements = preprocessing.preprocess_requirements(
            requirements,
            enable_pos_tagging=True,
            enable_lemmatization=False,
            enable_stemming=False)
        # Extend stop word list: https://www.wordfrequency.info/free.asp?s=y

        maut_results = []
        for requ in requirements:
            maut_temp = 0
            if len(list(requ.title_tokens_pos_tags)) > 0:
                for tag in set(requ.title_tokens_pos_tags +
                               requ.description_tokens_pos_tags):
                    for matching_pos_classes in ["NN", "NNS", "NE", "FW"]:
                        if matching_pos_classes in tag:
                            maut_temp += fetch_twitter(str(tag[0]))
            else:
                for token in set(requ.title_tokens + requ.description_tokens):
                    print("Note here!!")
                    maut_temp += fetch_twitter(token)
            maut_results.append(maut_temp)

        for idx, requ in enumerate(requirements):
            response_list.append(
                RequirementPopularity(
                    id=requ.id,
                    #'totalNumberOfFavorites': favorite_count,
                    #'totalNumberOfRetweets': retweet_count,
                    #'MAUT': (maut_result/num_of_tweets),
                    #'popularity': "{0:.5f}".format((maut_results[i] / sum(maut_results))*100)
                    popularity=((maut_results[idx] * 100) / sum(maut_results)
                                if sum(maut_results) > 0 else 0)))

    return response_list
def perform_svd():
    enable_tagging = True
    max_distance = 0.6
    with open(os.path.join(helper.APP_PATH, "data",
                           "requirements_en.json")) as f:
        requs = json.load(f)

    max_distance = 0.4
    with open(
            os.path.join(helper.APP_PATH, "data",
                         "siemens_requirements_en.csv")) as f:
        enable_tagging = False
        plain_requirements = csv_reader(f)
        requs = []
        for (idx, description) in enumerate(plain_requirements):
            if idx > 400:
                break
            requs += [{'id': idx, 'title': '', 'description': description}]
    #print(json.dumps(requs))
    #import sys;sys.exit()

    #pprint(requs)
    requs = list(map(lambda r: Requirement.from_dict(r), requs))
    lang = "en"

    requs = list(
        map(lambda r: requirement.Requirement(r.id, r.title, r.description),
            requs))
    requs = preprocessing.preprocess_requirements(
        requs,
        enable_pos_tagging=enable_tagging,
        enable_lemmatization=enable_tagging,
        enable_stemming=False,
        lang=lang)

    _logger.info("SVD...")
    predictions_map = svd.svd(requs, k=3, max_distance=max_distance)
    for subject_requirement, similar_requirements in predictions_map.items():
        if len(similar_requirements) == 0:
            continue

        #print("-" * 80)
        #print(subject_requirement.description_tokens)
        for similar_requirement in similar_requirements:
            print("#{}: {} -> #{}: {}".format(
                subject_requirement.id, subject_requirement.description[:80],
                similar_requirement.id, similar_requirement.description[:80]))
Exemple #4
0
def recommend_requirement_dependencies(body):  # noqa: E501
    """Retrieve a list with values for given set of requirements indicating their popularity for the crowd on twitter.

     # noqa: E501

    :param body: Requirement objects for which the social popularity should be measured
    :type body: list | bytes

    :rtype: List[Requirement]
    """

    response_list = []
    # TODO: introduce parameter to set language
    lang = "en"

    if connexion.request.is_json:
        content = connexion.request.get_json()
        assert isinstance(content, list)
        requs = [Requirement.from_dict(d) for d in content]  # noqa: E501

        requs = list(map(lambda r: requirement.Requirement(r.id, r.title, r.description, r.comments), requs))
        for r in requs:
            r.append_comments_to_description()

        requs = preprocessing.preprocess_requirements(requs,
                                                      enable_stemming=False,
                                                      lang=lang)

        requs = list(filter(lambda r: len(r.tokens()) > 0, requs))

        if len(requs) == 0:
            return []

        _logger.info("SVD...")

        if len(requs) > 100:
            min_distance, max_distance = 0.2, 0.5
            k = 10
        elif len(requs) > 50:
            min_distance, max_distance = 0.2, 0.6
            k = 8
        elif len(requs) > 30:
            min_distance, max_distance = 0.2, 0.65
            k = 5
        elif len(requs) > 10:
            min_distance, max_distance = 0.2, 0.7
            k = 3
        elif len(requs) > 5:
            min_distance, max_distance = 0.2, 0.75
            k = 2
        else:
            min_distance, max_distance = 0.2, 0.8
            k = 1

        predictions_map = svd.svd(requs, k=k, min_distance=min_distance, max_distance=max_distance)
        dependency_pairs = set()
        for subject_requirement, dependent_requirements in predictions_map.items():
            requ = Requirement.from_dict({
                "id": subject_requirement.id,
                "title": subject_requirement.title,
                "description": subject_requirement.description,
                "comments": subject_requirement.comments
            })
            rx = subject_requirement.id
            dependent_requirement_ids = list(set(map(lambda r: r.id, dependent_requirements)))
            all_undirected_pairs_of_subject_requirement = map(lambda ry: [(rx, ry), (ry, rx)], dependent_requirement_ids)
            dependency_pairs_of_subject_requirement = set(list(itertools.chain(*all_undirected_pairs_of_subject_requirement)))
            remaining_dependency_pairs_of_subject_requirement = dependency_pairs_of_subject_requirement - dependency_pairs
            dependency_pairs = dependency_pairs.union(remaining_dependency_pairs_of_subject_requirement)
            predictions = list(set(map(lambda t: t[0] if t[0] != rx else t[1], remaining_dependency_pairs_of_subject_requirement)))

            requ.predictions = predictions
            response_list += [requ]
            for dependent_requirement in dependent_requirements:
                print("{} -> {}".format(subject_requirement, dependent_requirement))

        """
        for idx, requ in enumerate(requirements):
            response_list.append(Requirement.from_dict({
                "id": requ.id,
                "title": requ.title,
                "description": requ.description
            }))
        """

    return response_list
def recommend_similar_requirements(body):  # noqa: E501
    """Retrieve a list with values for given set of requirements indicating their popularity for the crowd on twitter.

     # noqa: E501

    :param body: Requirement objects for which the social popularity should be measured
    :type body: list | bytes

    :rtype: List[Requirement]
    """

    response_list = []
    # TODO: introduce parameter to set language
    lang = "en"

    if connexion.request.is_json:
        content = connexion.request.get_json()
        assert isinstance(content, list)
        requs = [Requirement.from_dict(d) for d in content]  # noqa: E501

        requs = list(
            map(
                lambda r: requirement.Requirement(r.id, r.title, r.description,
                                                  r.comments), requs))
        for r in requs:
            r.append_comments_to_description()

        requs = preprocessing.preprocess_requirements(requs, lang=lang)

        requs = list(filter(lambda r: len(r.tokens()) > 0, requs))

        if len(requs) == 0:
            return []

        _logger.info("SVD...")

        if len(requs) > 100:
            max_distance = 0.4
            k = 10
        elif len(requs) > 50:
            max_distance = 0.5
            k = 8
        elif len(requs) > 30:
            max_distance = 0.55
            k = 5
        elif len(requs) > 10:
            max_distance = 0.6
            k = 3
        elif len(requs) > 5:
            max_distance = 0.6
            k = 2
        else:
            max_distance = 0.6
            k = 1

        predictions_map = svd.svd(requs, k=k, max_distance=max_distance)
        predictions = {}

        for subject_requirement, similar_requirements in predictions_map.items(
        ):
            rx = subject_requirement.id
            rx_predictions = list(
                set(map(lambda r: r.id, similar_requirements)))
            if rx not in predictions:
                predictions[rx] = set()
            predictions[rx] = predictions[rx].union(rx_predictions)

            for ry in rx_predictions:
                if ry not in predictions:
                    predictions[ry] = set()
                predictions[ry].add(rx)

        for subject_requirement, similar_requirements in predictions_map.items(
        ):
            requ = Requirement.from_dict({
                "id":
                subject_requirement.id,
                "title":
                subject_requirement.title,
                "description":
                subject_requirement.description,
                "comments":
                subject_requirement.comments
            })
            rx = subject_requirement.id
            requ.predictions = list(predictions[rx])
            response_list += [requ]
            for similar_requirement in similar_requirements:
                print("{} -> {}".format(subject_requirement,
                                        similar_requirement))
        """
        for idx, requ in enumerate(requirements):
            response_list.append(Requirement.from_dict({
                "id": requ.id,
                "title": requ.title,
                "description": requ.description
            }))
        """

    return response_list