def compute_textual_indices(text): lang = str_to_lang("de") model = get_default_model(lang) #model = VECTOR_MODELS[lang][CorporaEnum.WIKI][VectorModelType.WORD2VEC]( # name=CorporaEnum.WIKI.value, lang=Lang.DE) doc = Document(lang, text) cna_graph = CnaGraph(docs=doc, models=[model]) compute_indices(doc=doc, cna_graph=cna_graph) block = [] for b in doc.get_blocks(): sent = [] for s in b.get_sentences(): sent.append(s.text) block.append({'text': b.text, 'sentences': sent}) feedback_text = {'doc': doc.text, 'blocks': block} sentences = [sent.indices for sent in doc.get_sentences()] blocks = [block.indices for block in doc.get_blocks()] return { 'text': feedback_text, 'indices': { 'document': doc.indices, 'sentence': sentences, 'block': blocks } }
def keywordsPost(): """TODO, not working""" params = json.loads(request.get_data()) posTagging = params.get('pos-tagging') bigrams = params.get('bigrams') text = params.get('text') languageString = params.get('language') lang = str_to_lang(languageString) threshold = params.get('threshold') plotName = "wordnet" #plotName = params.get('saveAs') # if lang is Lang.RO: # vector_model = VECTOR_MODELS[lang][CorporaEnum.README][VectorModelType.WORD2VEC]( # name=CorporaEnum.README.value, lang=lang) # elif lang is Lang.EN: # vector_model = VECTOR_MODELS[lang][CorporaEnum.COCA][VectorModelType.WORD2VEC]( # name=CorporaEnum.COCA.value, lang=lang) # elif lang is Lang.ES: # vector_model = VECTOR_MODELS[lang][CorporaEnum.JOSE_ANTONIO][VectorModelType.WORD2VEC]( # name=CorporaEnum.JOSE_ANTONIO.value, lang=lang) # lsa = params.get('lsa') # lda = params.get('lda') # w2v = params.get('w2v') # threshold = params.get('threshold') # textElement = Document(lang=lang, text=text, vector_model=vector_model) # print(textElement.keywords) dataName = params.get('saveAs') textType = params.get('type') JsonName = params.get('topicName') keywords = KeywordExtractor.extract_keywords(True, text=text, lang=lang) keywordsWithmax = KeywordExtractor.extract_keywords(True, text=text, lang=lang, max_keywords=15) return jsonify(transform_for_visualization(dataName, JsonName, textType, keywords=keywords, keywordsWithmax=keywordsWithmax, lang=lang))
def computeCnaGraphPost(): params = json.loads(request.get_data()) texts = [doc["text"] for doc in params.get('texts')] languageString = params.get('lang') lang = str_to_lang(languageString) models = params.get('models') dataName = params.get('saveAs') JsonName = params.get('topicName') return compute_graph(dataName, JsonName, texts, lang, models)
def compute_indices_format(text): lang = str_to_lang("de") model = get_default_model(lang) #model = VECTOR_MODELS[lang][CorporaEnum.WIKI][VectorModelType.WORD2VEC]( # name=CorporaEnum.WIKI.value, lang=Lang.DE) doc = Document(Lang.DE, text) cna_graph = CnaGraph(docs=doc, models=[model]) compute_indices(doc=doc, cna_graph=cna_graph) indices = {} for key, value in doc.indices.items(): indices.update({str(key): value}) return indices
def textSimilarityPost(): params = json.loads(request.get_data()) corpus = params.get('corpus') if params.get( 'corpus') != None else 'le_monde_small' languageString = params.get('language') lang = str_to_lang(languageString) texts = params.get('texts') vectorModels = [] try: vectorModel = LSA(corpus, lang) vectorModels.append(vectorModel) except FileNotFoundError as inst: print(inst) try: vectorModel = LDA(corpus, lang) vectorModels.append(vectorModel) except FileNotFoundError as inst: print(inst) try: vectorModel = Word2Vec(corpus, lang) vectorModels.append(vectorModel) except FileNotFoundError as inst: print(inst) noTexts = len(texts) pairs = [] for i in range(0, noTexts): document1 = Document(lang, texts[i]) for j in range(i + 1, noTexts): document2 = Document(lang, texts[j]) scores = [] for vectorModel in vectorModels: similarityScore = vectorModel.similarity( document1, document2) scoreDTO = ScoreDTO(vectorModel.type.name, similarityScore) scores.append(scoreDTO) pairDTO = PairDTO(i, j, scores) pairs.append(pairDTO) # print(pairs) scoresDTO = ScoresDTO(lang, corpus, pairs) textSimilarityResponse = TextSimilarityResponse(scoresDTO, "", True) jsonString = textSimilarityResponse.toJSON() return jsonString
def sentiment_post(request): params = json.loads(request.get_data()) text = params.get("text") lang = str_to_lang(params.get("lang")) model_name = params["model"] if "model" in params else "base" model = SentimentModelsCache.get_instance().get_model(lang, model_name) if not model: return SentimentResponse(data="", errorMsg="Model doesn't exist", success=False).toJSON() prediction = model.process_text(text) return SentimentResponse(data={ "prediction": prediction[0] }, errorMsg="", success=True).toJSON()
def compute_indices_format_array(questions): lang = str_to_lang("de") model = get_default_model(lang) #model = VECTOR_MODELS[lang][CorporaEnum.WIKI][VectorModelType.WORD2VEC]( # name=CorporaEnum.WIKI.value, lang=Lang.DE) result = [] for question in questions: doc = Document(Lang.DE, question['expert']) cna_graph = CnaGraph(docs=doc, models=[model]) compute_indices(doc=doc, cna_graph=cna_graph) doc_indices = {} for key, value in doc.indices.items(): doc_indices.update({str(key): value}) doc = Document(Lang.DE, question['text']) cna_graph = CnaGraph(docs=doc, models=[model]) compute_indices(doc=doc, cna_graph=cna_graph) block = [] for b in expert.get_blocks(): sent = [] for s in b.get_sentences(): sent.append(s.text) block.append({'text': b.text, 'sentences': sent}) feedback_text = {'doc': doc.text, 'blocks': block} sentences = [sent.indices for sent in doc.get_sentences()] blocks = [block.indices for block in doc.get_blocks()] doc_indices = { 'text': feedback_text, 'indices': { 'document': expert.indices, 'sentence': sentences, 'block': blocks } } level = predictLevel(doc_indices['indices']['document']) result.append({ 'feedback': compare_feedback(expert, doc_indices), 'level': level }) return result
def amocPost(): params = json.loads(request.get_data()) text = params.get("text") semantic_model = params.get("semanticModel") min_activation_threshold = float(params.get("minActivationThreshold")) max_active_concepts = int(params.get("maxActiveConcepts")) max_semantic_expand = int(params.get("maxSemanticExpand")) languageString = params.get('language') lang = str_to_lang(languageString) w2v = cache.get_model(VectorModelType.WORD2VEC, semantic_model, lang) lda = cache.get_model(VectorModelType.LDA, semantic_model, lang) lsa = cache.get_model(VectorModelType.LSA, semantic_model, lang) semantic_models = [w2v, lda, lsa] cms = ComprehensionModelService(semantic_models, lang, min_activation_threshold, max_active_concepts, max_semantic_expand) result = cms.run(text) amoc_response = AmocResponse(result, "", True) return amoc_response.toJSON()
def textualComplexityPost(): params = json.loads(request.get_data()) text = params.get('text') languageString = params.get('language') lang = str_to_lang(languageString) # lsa = params.get('lsa') # lda = params.get('lda') # w2v = params.get('w2v') if lang is Lang.RO: vector_model = VECTOR_MODELS[lang][CorporaEnum.README][ VectorModelType.WORD2VEC](name=CorporaEnum.README.value, lang=lang) elif lang is Lang.EN: vector_model = VECTOR_MODELS[lang][CorporaEnum.COCA][ VectorModelType.WORD2VEC](name=CorporaEnum.COCA.value, lang=lang) elif lang is Lang.ES: vector_model = VECTOR_MODELS[lang][CorporaEnum.JOSE_ANTONIO][ VectorModelType.WORD2VEC](name=CorporaEnum.JOSE_ANTONIO.value, lang=lang) elif lang is Lang.FR: vector_model = VECTOR_MODELS[lang][CorporaEnum.LE_MONDE][ VectorModelType.WORD2VEC](name=CorporaEnum.LE_MONDE.value, lang=lang) elif lang is Lang.RU: vector_model = VECTOR_MODELS[lang][CorporaEnum.RNC_WIKIPEDIA][ VectorModelType.WORD2VEC](name=CorporaEnum.RNC_WIKIPEDIA.value, lang=lang) elif lang is Lang.DE: vector_model = VECTOR_MODELS[lang][CorporaEnum.WIKI][ VectorModelType.WORD2VEC](name=CorporaEnum.WIKI.value, lang=lang) document = Document(lang=lang, text=text) cna_graph = CnaGraph(docs=document, models=[vector_model]) compute_indices(doc=document, cna_graph=cna_graph) categoriesList = [] complexityIndices = {} for key, value in document.indices.items(): categoryName = key.category.name if (categoryName not in categoriesList): categoriesList.append(categoryName) complexityIndexDTO = ComplexityIndexDTO(repr(key), float(value), type="document") # complexityIndex[categoryName] = complexityIndexDTO if categoryName not in complexityIndices: complexityIndices[categoryName] = [] complexityIndices[categoryName].append(complexityIndexDTO) #data = {} #for key, v in document.indices.items(): #data[repr(key)] = [v] # load the model from disk #loaded_model = pickle.load(open("rb_api/textual_complexity/lsvc.sav", 'rb')) #item = pd.DataFrame.from_dict(data) #level = loaded_model.predict(item)[0] #TO do train a Model level = "C" for paragraph_id, paragraph in enumerate(document.components): for key, value in paragraph.indices.items(): categoryName = key.category.name if (categoryName not in categoriesList): categoriesList.append(categoryName) complexityIndexDTO = ComplexityIndexDTO( repr(key), float(value), type="paragraph", paragraph_index=paragraph_id) # complexityIndex[categoryName] = complexityIndexDTO if categoryName not in complexityIndices: complexityIndices[categoryName] = [] complexityIndices[categoryName].append(complexityIndexDTO) for paragraph_id, paragraph in enumerate(document.components): for sentence_id, sentence in enumerate(paragraph.components): for key, value in sentence.indices.items(): categoryName = key.category.name if (categoryName not in categoriesList): categoriesList.append(categoryName) complexityIndexDTO = ComplexityIndexDTO( repr(key), float(value), type="sentence", paragraph_index=paragraph_id, sentence_index=sentence_id) # complexityIndex[categoryName] = complexityIndexDTO if categoryName not in complexityIndices: complexityIndices[categoryName] = [] complexityIndices[categoryName].append(complexityIndexDTO) # iterate through complexity index array complexityIndicesResponse = [ ComplexityIndicesDTO(category, indices) for category, indices in complexityIndices.items() ] texts = [[sentence.text for sentence in paragraph.components] for paragraph in document.components] textualComplexityDataDTO = TextualComplexityDataDTO( languageString, level, texts, categoriesList, complexityIndicesResponse) textualComplexityResponse = TextualComplexityResponse( textualComplexityDataDTO, "", True) jsonString = textualComplexityResponse.toJSON() # print(textualComplexityResponse) # jsonString = json.dumps(textualComplexityResponse, default=TextualComplexityResponse.dumper, indent=2) return jsonString
def csclPost(): params = json.loads(request.get_data()) csclFile = params.get('cscl-file') languageString = params.get('language') lang = str_to_lang(languageString) lsaCorpus = params.get('lsa') ldaCorpus = params.get('lda') word2vecCorpus = params.get('w2v') basepath = path.dirname(__file__) filepath = path.abspath(path.join(basepath, "..", "..", "upload", csclFile)) conv_thread = load_from_xml(filepath) conv = Conversation(lang=lang, conversation_thread=conv_thread, apply_heuristics=False) vectorModels = [] if not "".__eq__(lsaCorpus): vectorModels.append( create_vector_model(lang, VectorModelType.from_str("lsa"), lsaCorpus)) if not "".__eq__(ldaCorpus): vectorModels.append( create_vector_model(lang, VectorModelType.from_str("lda"), ldaCorpus)) if not "".__eq__(word2vecCorpus): vectorModels.append( create_vector_model(lang, VectorModelType.from_str("word2vec"), word2vecCorpus)) conv.graph = CnaGraph(docs=[conv], models=vectorModels) participant_list = conv.get_participants() names = [p.get_id() for p in participant_list] conceptMaps = {'LSA': None, 'LDA': None, 'WORD2VEC': None} # Begin Concept Map for vectorModel in vectorModels: keywords_extractor = KeywordExtractor() keywords = keywords_extractor.extract_keywords( text=conv.text, lang=lang, vector_model=vectorModel) conceptMap = { "nodeList": [], "edgeList": [], } for score, word in keywords: conceptMap["nodeList"].append({ "type": "Word", "uri": word, "displayName": word, "active": True, "degree": score }) vectors = {} for _, keyword in keywords: vectors[keyword] = vectorModel.get_vector(keyword) for _, keyword1 in keywords: for _, keyword2 in keywords: conceptMap["edgeList"].append({ "edgeType": "SemanticDistance", "score": vectorModel.similarity(vectors[keyword1], vectors[keyword2]), "sourceUri": keyword1, "targetUri": keyword2 }) conceptMaps[vectorModel.type.name] = conceptMap # End Concept Map evaluate_interaction(conv) evaluate_involvement(conv) perform_sna(conv, False) evaluate_textual_complexity(conv) # Begin Participant Interaction Graph participantInteractionGraph = { "nodeList": [], "edgeList": [], } nameIndex = {} for i, p in enumerate(participant_list): participantInteractionGraph["nodeList"].append( { "type": "Author", "uri": i, "displayName": p.get_id(), "active": True, "degree": p.get_index(CNAIndices.INDEGREE) + p.get_index(CNAIndices.OUTDEGREE) }, ) nameIndex[p.get_id()] = i for p1 in participant_list: for p2 in participant_list: participantInteractionGraph["edgeList"].append( { "edgeType": "SemanticDistance", "score": conv.get_score(p1.get_id(), p2.get_id()), "sourceUri": nameIndex[p1.get_id()], "targetUri": nameIndex[p2.get_id()] }, ) # End Participant Interaction Graph # Begin CSCL Indices csclIndices = {} contributions = conv.get_contributions() noParticipantContributions = {} for index, p in enumerate(participant_list): noParticipantContributions[p.get_id()] = 0 for index, contribution in enumerate(contributions): noParticipantContributions[ contribution.get_participant().get_id()] += 1 for p in participant_list: # adunat social kb din contributiile lui participantDict = { "CONTRIBUTIONS_SCORE": p.get_index(CNAIndices.CONTRIBUTIONS_SCORE), # "INTERACTION_SCORE": p.get_index(CNAIndices.INTERACTION_SCORE), "SOCIAL_KB": p.get_index(CNAIndices.SOCIAL_KB), "OUTDEGREE": p.get_index(CNAIndices.OUTDEGREE), "INDEGREE": p.get_index(CNAIndices.INDEGREE), "NO_CONTRIBUTIONS": noParticipantContributions[p.get_id()], "CLOSENESS": p.get_index(CNAIndices.CLOSENESS), "BETWEENNESS": p.get_index(CNAIndices.BETWEENNESS), "EIGENVECTOR": p.get_index(CNAIndices.EIGENVECTOR), } csclIndices[p.get_id()] = participantDict # End CSCL Indices # Begin CSCL Descriptions csclIndicesDescriptions = {} for index in CsclIndicesDescriptions: csclIndicesDescriptions[index.name] = index.value # End CSCL Descriptions # Participant Evolution participantEvolution = [] importance = conv.graph.importance participantImportance = {} for participant in participant_list: participantImportance[participant.get_id()] = 0 for index, contribution in enumerate(contributions): for participant in participant_list: if participant == contribution.get_participant(): participantImportance[participant.get_id()] += importance[ contribution] # suma muchiilor - de luat in core nodeDict = { "nodeName": participant.get_id(), "x": index, "y": participantImportance[participant.get_id()] } participantEvolution.append(nodeDict) # End Participant Evolution # Social KB socialKB = [] for index, contribution in enumerate(contributions): socialKB.append(0) for index1, contribution1 in enumerate(contributions): for index2, contribution2 in enumerate(contributions[:index1]): weight = get_block_importance(conv.graph.filtered_graph, contribution1, contribution2) if weight > 0 and contribution1.get_participant( ) != contribution2.get_participant(): socialKB[index1] += weight socialKBResponse = [] for index, contribution in enumerate(contributions): nodeDict = {"nodeName": "", "x": index, "y": socialKB[index]} socialKBResponse.append(nodeDict) # End Social KB # Tabel dupa replici; pt fiecare replica afisam social kb, local importance, total importance sumImportance = 0 sumKB = 0 contributionsIndices = { 'contributions': [], 'total': { 'SOCIAL_KB': 0, 'LOCAL_IMPORTANCE': 0 } } for index, contribution in enumerate(contributions): sumKB += socialKB[index] sumImportance += importance[contribution] rawContrib = contribution.get_raw_contribution() contributionDict = { "participant": contribution.get_participant().get_id(), "genid": contribution.get_raw_contribution()['id'], "ref": contribution.get_raw_contribution()['parent_id'], "timestamp": contribution.get_timestamp().strftime('%Y-%m-%d %H:%M:%S.%f %Z'), "text": contribution.get_raw_contribution()['text'], "SOCIAL_KB": socialKB[index], "LOCAL_IMPORTANCE": importance[contribution], } contributionsIndices['contributions'].append(contributionDict) contributionsIndices['total'] = { "SOCIAL_KB": sumKB, "LOCAL_IMPORTANCE": sumImportance, } contibutionsTexts = [ contribution.get_raw_contribution()['text'] for contribution in contributions ] cnaModels = [] for model in vectorModels: cnaModel = {'corpus': model.corpus, 'model': model.type.name.lower()} cnaModels.append(cnaModel) textLabels = ['Utterance', 'Sentence'] cnaGraph = compute_graph_cscl(texts=contibutionsTexts, lang=lang, models=cnaModels, textLabels=textLabels) csclDataDTO = CsclDataDTO(languageString, conceptMaps, csclIndices, csclIndicesDescriptions, participantEvolution, participantInteractionGraph, socialKBResponse, contributionsIndices, cnaGraph) csclResponse = CsclResponse(csclDataDTO, "", True) try: jsonString = csclResponse.toJSON() except Exception as e: print("Error when serializing") raise e return jsonString