Example #1
0
def compute_textual_indices(text):
    lang = str_to_lang("de")
    model = get_default_model(lang)
    #model =  VECTOR_MODELS[lang][CorporaEnum.WIKI][VectorModelType.WORD2VEC](
    #        name=CorporaEnum.WIKI.value, lang=Lang.DE)

    doc = Document(lang, text)
    cna_graph = CnaGraph(docs=doc, models=[model])
    compute_indices(doc=doc, cna_graph=cna_graph)

    block = []

    for b in doc.get_blocks():
        sent = []
        for s in b.get_sentences():
            sent.append(s.text)
        block.append({'text': b.text, 'sentences': sent})

    feedback_text = {'doc': doc.text, 'blocks': block}

    sentences = [sent.indices for sent in doc.get_sentences()]
    blocks = [block.indices for block in doc.get_blocks()]

    return {
        'text': feedback_text,
        'indices': {
            'document': doc.indices,
            'sentence': sentences,
            'block': blocks
        }
    }
Example #2
0
def keywordsPost():
    """TODO, not working"""
    params = json.loads(request.get_data())
    posTagging = params.get('pos-tagging')
    bigrams = params.get('bigrams')
    text = params.get('text')
    languageString = params.get('language')
    lang = str_to_lang(languageString)
    threshold = params.get('threshold')
    plotName = "wordnet"
    #plotName = params.get('saveAs')

    # if lang is Lang.RO:
    #     vector_model = VECTOR_MODELS[lang][CorporaEnum.README][VectorModelType.WORD2VEC](
    #         name=CorporaEnum.README.value, lang=lang)
    # elif lang is Lang.EN:
    #     vector_model = VECTOR_MODELS[lang][CorporaEnum.COCA][VectorModelType.WORD2VEC](
    #         name=CorporaEnum.COCA.value, lang=lang)
    # elif lang is Lang.ES:
    #     vector_model = VECTOR_MODELS[lang][CorporaEnum.JOSE_ANTONIO][VectorModelType.WORD2VEC](
    #         name=CorporaEnum.JOSE_ANTONIO.value, lang=lang)

    # lsa = params.get('lsa')
    # lda = params.get('lda')
    # w2v = params.get('w2v')
    # threshold = params.get('threshold')

    # textElement = Document(lang=lang, text=text, vector_model=vector_model)
    # print(textElement.keywords)
    dataName =  params.get('saveAs') 
    textType = params.get('type')
    JsonName = params.get('topicName')
    keywords = KeywordExtractor.extract_keywords(True, text=text, lang=lang)
    keywordsWithmax = KeywordExtractor.extract_keywords(True, text=text, lang=lang, max_keywords=15)
    return jsonify(transform_for_visualization(dataName, JsonName, textType, keywords=keywords, keywordsWithmax=keywordsWithmax, lang=lang))
def computeCnaGraphPost():
    params = json.loads(request.get_data())
    texts = [doc["text"] for doc in params.get('texts')]
    languageString = params.get('lang')
    lang = str_to_lang(languageString)
    models = params.get('models')
    dataName = params.get('saveAs')
    JsonName = params.get('topicName')
    return compute_graph(dataName, JsonName, texts, lang, models)
Example #4
0
def compute_indices_format(text):
    lang = str_to_lang("de")
    model = get_default_model(lang)
    #model =  VECTOR_MODELS[lang][CorporaEnum.WIKI][VectorModelType.WORD2VEC](
    #        name=CorporaEnum.WIKI.value, lang=Lang.DE)
    doc = Document(Lang.DE, text)
    cna_graph = CnaGraph(docs=doc, models=[model])
    compute_indices(doc=doc, cna_graph=cna_graph)

    indices = {}
    for key, value in doc.indices.items():
        indices.update({str(key): value})
    return indices
def textSimilarityPost():
    params = json.loads(request.get_data())
    corpus = params.get('corpus') if params.get(
        'corpus') != None else 'le_monde_small'
    languageString = params.get('language')
    lang = str_to_lang(languageString)
    texts = params.get('texts')

    vectorModels = []
    try:
        vectorModel = LSA(corpus, lang)
        vectorModels.append(vectorModel)
    except FileNotFoundError as inst:
        print(inst)

    try:
        vectorModel = LDA(corpus, lang)
        vectorModels.append(vectorModel)
    except FileNotFoundError as inst:
        print(inst)

    try:
        vectorModel = Word2Vec(corpus, lang)
        vectorModels.append(vectorModel)
    except FileNotFoundError as inst:
        print(inst)

    noTexts = len(texts)
    pairs = []
    for i in range(0, noTexts):
        document1 = Document(lang, texts[i])
        for j in range(i + 1, noTexts):
                document2 = Document(lang, texts[j])
                scores = []
                for vectorModel in vectorModels:
                        similarityScore = vectorModel.similarity(
                            document1, document2)
                        scoreDTO = ScoreDTO(vectorModel.type.name, similarityScore)
                        scores.append(scoreDTO)
                pairDTO = PairDTO(i, j, scores)
                pairs.append(pairDTO)

    # print(pairs)
    scoresDTO = ScoresDTO(lang, corpus, pairs)
    textSimilarityResponse = TextSimilarityResponse(scoresDTO, "", True)
    jsonString = textSimilarityResponse.toJSON()

    return jsonString
Example #6
0
def sentiment_post(request):
    params = json.loads(request.get_data())
    text = params.get("text")
    lang = str_to_lang(params.get("lang"))
    model_name = params["model"] if "model" in params else "base"
    model = SentimentModelsCache.get_instance().get_model(lang, model_name)
    if not model:
        return SentimentResponse(data="",
                                 errorMsg="Model doesn't exist",
                                 success=False).toJSON()
    prediction = model.process_text(text)
    return SentimentResponse(data={
        "prediction": prediction[0]
    },
                             errorMsg="",
                             success=True).toJSON()
Example #7
0
def compute_indices_format_array(questions):
    lang = str_to_lang("de")
    model = get_default_model(lang)
    #model =  VECTOR_MODELS[lang][CorporaEnum.WIKI][VectorModelType.WORD2VEC](
    #        name=CorporaEnum.WIKI.value, lang=Lang.DE)
    result = []
    for question in questions:
        doc = Document(Lang.DE, question['expert'])
        cna_graph = CnaGraph(docs=doc, models=[model])
        compute_indices(doc=doc, cna_graph=cna_graph)
        doc_indices = {}
        for key, value in doc.indices.items():
            doc_indices.update({str(key): value})

        doc = Document(Lang.DE, question['text'])
        cna_graph = CnaGraph(docs=doc, models=[model])
        compute_indices(doc=doc, cna_graph=cna_graph)

        block = []

        for b in expert.get_blocks():
            sent = []
            for s in b.get_sentences():
                sent.append(s.text)
            block.append({'text': b.text, 'sentences': sent})

        feedback_text = {'doc': doc.text, 'blocks': block}
        sentences = [sent.indices for sent in doc.get_sentences()]
        blocks = [block.indices for block in doc.get_blocks()]

        doc_indices = {
            'text': feedback_text,
            'indices': {
                'document': expert.indices,
                'sentence': sentences,
                'block': blocks
            }
        }
        level = predictLevel(doc_indices['indices']['document'])
        result.append({
            'feedback': compare_feedback(expert, doc_indices),
            'level': level
        })

    return result
Example #8
0
def amocPost():
    params = json.loads(request.get_data())
    text = params.get("text")
    semantic_model = params.get("semanticModel")
    min_activation_threshold = float(params.get("minActivationThreshold"))
    max_active_concepts = int(params.get("maxActiveConcepts"))
    max_semantic_expand = int(params.get("maxSemanticExpand"))
    languageString = params.get('language')
    lang = str_to_lang(languageString)

    w2v = cache.get_model(VectorModelType.WORD2VEC, semantic_model, lang)
    lda = cache.get_model(VectorModelType.LDA, semantic_model, lang)
    lsa = cache.get_model(VectorModelType.LSA, semantic_model, lang)
    semantic_models = [w2v, lda, lsa]
    cms = ComprehensionModelService(semantic_models, lang,
                                    min_activation_threshold, max_active_concepts, max_semantic_expand)

    result = cms.run(text)
    amoc_response = AmocResponse(result, "", True)

    return amoc_response.toJSON()
def textualComplexityPost():
    params = json.loads(request.get_data())
    text = params.get('text')
    languageString = params.get('language')
    lang = str_to_lang(languageString)
    # lsa = params.get('lsa')
    # lda = params.get('lda')
    # w2v = params.get('w2v')

    if lang is Lang.RO:
        vector_model = VECTOR_MODELS[lang][CorporaEnum.README][
            VectorModelType.WORD2VEC](name=CorporaEnum.README.value, lang=lang)
    elif lang is Lang.EN:
        vector_model = VECTOR_MODELS[lang][CorporaEnum.COCA][
            VectorModelType.WORD2VEC](name=CorporaEnum.COCA.value, lang=lang)
    elif lang is Lang.ES:
        vector_model = VECTOR_MODELS[lang][CorporaEnum.JOSE_ANTONIO][
            VectorModelType.WORD2VEC](name=CorporaEnum.JOSE_ANTONIO.value,
                                      lang=lang)
    elif lang is Lang.FR:
        vector_model = VECTOR_MODELS[lang][CorporaEnum.LE_MONDE][
            VectorModelType.WORD2VEC](name=CorporaEnum.LE_MONDE.value,
                                      lang=lang)
    elif lang is Lang.RU:
        vector_model = VECTOR_MODELS[lang][CorporaEnum.RNC_WIKIPEDIA][
            VectorModelType.WORD2VEC](name=CorporaEnum.RNC_WIKIPEDIA.value,
                                      lang=lang)
    elif lang is Lang.DE:
        vector_model = VECTOR_MODELS[lang][CorporaEnum.WIKI][
            VectorModelType.WORD2VEC](name=CorporaEnum.WIKI.value, lang=lang)

    document = Document(lang=lang, text=text)
    cna_graph = CnaGraph(docs=document, models=[vector_model])
    compute_indices(doc=document, cna_graph=cna_graph)

    categoriesList = []
    complexityIndices = {}
    for key, value in document.indices.items():
        categoryName = key.category.name
        if (categoryName not in categoriesList):
            categoriesList.append(categoryName)

        complexityIndexDTO = ComplexityIndexDTO(repr(key),
                                                float(value),
                                                type="document")
        # complexityIndex[categoryName] = complexityIndexDTO
        if categoryName not in complexityIndices:
            complexityIndices[categoryName] = []
        complexityIndices[categoryName].append(complexityIndexDTO)

    #data = {}
    #for key, v in document.indices.items():
    #data[repr(key)] = [v]

    # load the model from disk
    #loaded_model = pickle.load(open("rb_api/textual_complexity/lsvc.sav", 'rb'))

    #item = pd.DataFrame.from_dict(data)
    #level = loaded_model.predict(item)[0]
    #TO do train a Model
    level = "C"

    for paragraph_id, paragraph in enumerate(document.components):
        for key, value in paragraph.indices.items():
            categoryName = key.category.name
            if (categoryName not in categoriesList):
                categoriesList.append(categoryName)

            complexityIndexDTO = ComplexityIndexDTO(
                repr(key),
                float(value),
                type="paragraph",
                paragraph_index=paragraph_id)
            # complexityIndex[categoryName] = complexityIndexDTO
            if categoryName not in complexityIndices:
                complexityIndices[categoryName] = []
            complexityIndices[categoryName].append(complexityIndexDTO)

    for paragraph_id, paragraph in enumerate(document.components):
        for sentence_id, sentence in enumerate(paragraph.components):
            for key, value in sentence.indices.items():
                categoryName = key.category.name
                if (categoryName not in categoriesList):
                    categoriesList.append(categoryName)

                complexityIndexDTO = ComplexityIndexDTO(
                    repr(key),
                    float(value),
                    type="sentence",
                    paragraph_index=paragraph_id,
                    sentence_index=sentence_id)
                # complexityIndex[categoryName] = complexityIndexDTO
                if categoryName not in complexityIndices:
                    complexityIndices[categoryName] = []
                complexityIndices[categoryName].append(complexityIndexDTO)

    # iterate through complexity index array
    complexityIndicesResponse = [
        ComplexityIndicesDTO(category, indices)
        for category, indices in complexityIndices.items()
    ]
    texts = [[sentence.text for sentence in paragraph.components]
             for paragraph in document.components]

    textualComplexityDataDTO = TextualComplexityDataDTO(
        languageString, level, texts, categoriesList,
        complexityIndicesResponse)

    textualComplexityResponse = TextualComplexityResponse(
        textualComplexityDataDTO, "", True)
    jsonString = textualComplexityResponse.toJSON()
    # print(textualComplexityResponse)
    # jsonString = json.dumps(textualComplexityResponse, default=TextualComplexityResponse.dumper, indent=2)

    return jsonString
Example #10
0
def csclPost():
    params = json.loads(request.get_data())
    csclFile = params.get('cscl-file')
    languageString = params.get('language')
    lang = str_to_lang(languageString)
    lsaCorpus = params.get('lsa')
    ldaCorpus = params.get('lda')
    word2vecCorpus = params.get('w2v')

    basepath = path.dirname(__file__)
    filepath = path.abspath(path.join(basepath, "..", "..", "upload",
                                      csclFile))
    conv_thread = load_from_xml(filepath)
    conv = Conversation(lang=lang,
                        conversation_thread=conv_thread,
                        apply_heuristics=False)
    vectorModels = []
    if not "".__eq__(lsaCorpus):
        vectorModels.append(
            create_vector_model(lang, VectorModelType.from_str("lsa"),
                                lsaCorpus))
    if not "".__eq__(ldaCorpus):
        vectorModels.append(
            create_vector_model(lang, VectorModelType.from_str("lda"),
                                ldaCorpus))
    if not "".__eq__(word2vecCorpus):
        vectorModels.append(
            create_vector_model(lang, VectorModelType.from_str("word2vec"),
                                word2vecCorpus))
    conv.graph = CnaGraph(docs=[conv], models=vectorModels)

    participant_list = conv.get_participants()
    names = [p.get_id() for p in participant_list]

    conceptMaps = {'LSA': None, 'LDA': None, 'WORD2VEC': None}
    # Begin Concept Map
    for vectorModel in vectorModels:
        keywords_extractor = KeywordExtractor()
        keywords = keywords_extractor.extract_keywords(
            text=conv.text, lang=lang, vector_model=vectorModel)
        conceptMap = {
            "nodeList": [],
            "edgeList": [],
        }
        for score, word in keywords:
            conceptMap["nodeList"].append({
                "type": "Word",
                "uri": word,
                "displayName": word,
                "active": True,
                "degree": score
            })
        vectors = {}
        for _, keyword in keywords:
            vectors[keyword] = vectorModel.get_vector(keyword)
        for _, keyword1 in keywords:
            for _, keyword2 in keywords:
                conceptMap["edgeList"].append({
                    "edgeType":
                    "SemanticDistance",
                    "score":
                    vectorModel.similarity(vectors[keyword1],
                                           vectors[keyword2]),
                    "sourceUri":
                    keyword1,
                    "targetUri":
                    keyword2
                })
        conceptMaps[vectorModel.type.name] = conceptMap
    # End Concept Map

    evaluate_interaction(conv)
    evaluate_involvement(conv)
    perform_sna(conv, False)
    evaluate_textual_complexity(conv)

    # Begin Participant Interaction Graph
    participantInteractionGraph = {
        "nodeList": [],
        "edgeList": [],
    }
    nameIndex = {}
    for i, p in enumerate(participant_list):
        participantInteractionGraph["nodeList"].append(
            {
                "type":
                "Author",
                "uri":
                i,
                "displayName":
                p.get_id(),
                "active":
                True,
                "degree":
                p.get_index(CNAIndices.INDEGREE) +
                p.get_index(CNAIndices.OUTDEGREE)
            }, )
        nameIndex[p.get_id()] = i

    for p1 in participant_list:
        for p2 in participant_list:
            participantInteractionGraph["edgeList"].append(
                {
                    "edgeType": "SemanticDistance",
                    "score": conv.get_score(p1.get_id(), p2.get_id()),
                    "sourceUri": nameIndex[p1.get_id()],
                    "targetUri": nameIndex[p2.get_id()]
                }, )
    # End Participant Interaction Graph

    # Begin CSCL Indices
    csclIndices = {}

    contributions = conv.get_contributions()
    noParticipantContributions = {}
    for index, p in enumerate(participant_list):
        noParticipantContributions[p.get_id()] = 0
    for index, contribution in enumerate(contributions):
        noParticipantContributions[
            contribution.get_participant().get_id()] += 1

    for p in participant_list:
        # adunat social kb din contributiile lui
        participantDict = {
            "CONTRIBUTIONS_SCORE": p.get_index(CNAIndices.CONTRIBUTIONS_SCORE),
            # "INTERACTION_SCORE": p.get_index(CNAIndices.INTERACTION_SCORE),
            "SOCIAL_KB": p.get_index(CNAIndices.SOCIAL_KB),
            "OUTDEGREE": p.get_index(CNAIndices.OUTDEGREE),
            "INDEGREE": p.get_index(CNAIndices.INDEGREE),
            "NO_CONTRIBUTIONS": noParticipantContributions[p.get_id()],
            "CLOSENESS": p.get_index(CNAIndices.CLOSENESS),
            "BETWEENNESS": p.get_index(CNAIndices.BETWEENNESS),
            "EIGENVECTOR": p.get_index(CNAIndices.EIGENVECTOR),
        }
        csclIndices[p.get_id()] = participantDict
    # End CSCL Indices

    # Begin CSCL Descriptions
    csclIndicesDescriptions = {}
    for index in CsclIndicesDescriptions:
        csclIndicesDescriptions[index.name] = index.value
    # End CSCL Descriptions

    # Participant Evolution
    participantEvolution = []
    importance = conv.graph.importance
    participantImportance = {}
    for participant in participant_list:
        participantImportance[participant.get_id()] = 0

    for index, contribution in enumerate(contributions):
        for participant in participant_list:
            if participant == contribution.get_participant():
                participantImportance[participant.get_id()] += importance[
                    contribution]  # suma muchiilor - de luat in core
            nodeDict = {
                "nodeName": participant.get_id(),
                "x": index,
                "y": participantImportance[participant.get_id()]
            }
            participantEvolution.append(nodeDict)
    # End Participant Evolution

    # Social KB
    socialKB = []
    for index, contribution in enumerate(contributions):
        socialKB.append(0)

    for index1, contribution1 in enumerate(contributions):
        for index2, contribution2 in enumerate(contributions[:index1]):
            weight = get_block_importance(conv.graph.filtered_graph,
                                          contribution1, contribution2)
            if weight > 0 and contribution1.get_participant(
            ) != contribution2.get_participant():
                socialKB[index1] += weight

    socialKBResponse = []
    for index, contribution in enumerate(contributions):
        nodeDict = {"nodeName": "", "x": index, "y": socialKB[index]}
        socialKBResponse.append(nodeDict)
    # End Social KB

    # Tabel dupa replici; pt fiecare replica afisam social kb, local importance, total importance
    sumImportance = 0
    sumKB = 0
    contributionsIndices = {
        'contributions': [],
        'total': {
            'SOCIAL_KB': 0,
            'LOCAL_IMPORTANCE': 0
        }
    }
    for index, contribution in enumerate(contributions):
        sumKB += socialKB[index]
        sumImportance += importance[contribution]
        rawContrib = contribution.get_raw_contribution()
        contributionDict = {
            "participant":
            contribution.get_participant().get_id(),
            "genid":
            contribution.get_raw_contribution()['id'],
            "ref":
            contribution.get_raw_contribution()['parent_id'],
            "timestamp":
            contribution.get_timestamp().strftime('%Y-%m-%d %H:%M:%S.%f %Z'),
            "text":
            contribution.get_raw_contribution()['text'],
            "SOCIAL_KB":
            socialKB[index],
            "LOCAL_IMPORTANCE":
            importance[contribution],
        }
        contributionsIndices['contributions'].append(contributionDict)
    contributionsIndices['total'] = {
        "SOCIAL_KB": sumKB,
        "LOCAL_IMPORTANCE": sumImportance,
    }

    contibutionsTexts = [
        contribution.get_raw_contribution()['text']
        for contribution in contributions
    ]
    cnaModels = []
    for model in vectorModels:
        cnaModel = {'corpus': model.corpus, 'model': model.type.name.lower()}
        cnaModels.append(cnaModel)
    textLabels = ['Utterance', 'Sentence']
    cnaGraph = compute_graph_cscl(texts=contibutionsTexts,
                                  lang=lang,
                                  models=cnaModels,
                                  textLabels=textLabels)

    csclDataDTO = CsclDataDTO(languageString, conceptMaps, csclIndices,
                              csclIndicesDescriptions, participantEvolution,
                              participantInteractionGraph, socialKBResponse,
                              contributionsIndices, cnaGraph)

    csclResponse = CsclResponse(csclDataDTO, "", True)
    try:
        jsonString = csclResponse.toJSON()
    except Exception as e:
        print("Error when serializing")
        raise e
    return jsonString