def requirementAnalysis(fileArchimate=None): if fileArchimate is None: fileArchimate = u"/Users/morrj140/Documents/SolutionEngineering/Archimate Models/DVC v38.archimate" al = ArchiLib(fileArchimate) conceptsFile = fileConceptsRequirements searchTypes = list() searchTypes.append(u"archimate:Requirement") nl = al.getTypeNodes(searchTypes) logger.info(u"Find Words in Requirements...") concepts = Concepts(u"Requirement", u"Requirements") n = 0 for sentence in nl: n += 1 logger.debug(u"%s" % sentence) c = concepts.addConceptKeyType(u"Document" + str(n), u"Document") d = c.addConceptKeyType(sentence, u"Sentence" + str(n)) if True and sentence is not None: cleanSentence = ' '.join([word for word in sentence.split(u" ") if word not in stop]) for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)): if len(word) > 1 and pos[0] == u"N": e = d.addConceptKeyType(word, u"Word") f = e.addConceptKeyType(pos, u"POS") Concepts.saveConcepts(concepts, conceptsFile) logger.info(u"Saved : %s" % conceptsFile) chunks = Chunks(concepts) chunks.createChunks()
def test_RequirementAnalysis(cleandir, fileArchimate): assert (os.path.isfile(filePPTXIn) is True) al = ArchiLib(fileArchimate) conceptsFile = fileConceptsRequirements searchTypes = list() searchTypes.append(u"archimate:Requirement") nl = al.getTypeNodes(searchTypes) logger.info(u"Find Words in Requirements...") concepts = Concepts(u"Requirement", u"Requirements") n = 0 for sentence in nl: n += 1 logger.debug(u"%s" % sentence) c = concepts.addConceptKeyType(u"Document" + unicode(n), u"Document") d = c.addConceptKeyType(sentence, u"Sentence" + unicode(n)) if True and sentence is not None: cleanSentence = ' '.join([word for word in sentence.split(" ") if word not in stop]) for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)): if len(word) > 1 and pos[0] == u"N": e = d.addConceptKeyType(word, u"Word") f = e.addConceptKeyType(pos, u"POS") Concepts.saveConcepts(concepts, conceptsFile) logger.info(u"Saved : %s" % conceptsFile) assert (os.path.isfile(conceptsFile) is True) chunks = Chunks(concepts) chunks.createChunks() assert (os.path.isfile(fileConceptsChunks) is True)
def gapSimilarity(fileArchimate, searchTypes): lemmatizer = WordNetLemmatizer() logger.info(u"Using : %s" % fileArchimate) al = ArchiLib(fileArchimate) nl = al.getTypeNodes(searchTypes) logger.info(u"Find Words...") concepts = Concepts(u"Word", u"Topic") n = 0 for sentence in nl: n += 1 if sentence is None: continue logger.info(u"%s" % sentence) c = concepts.addConceptKeyType(u"Document" + str(n), nl[sentence][ARCHI_TYPE]) d = c.addConceptKeyType(sentence, nl[sentence][ARCHI_TYPE]) cleanSentence = u' '.join([word for word in sentence.split(u" ") if word not in stop]) for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)): if len(word) > 1 and pos[0] == u"N": lemmaWord =lemmatizer.lemmatize(word.lower()) e = d.addConceptKeyType(lemmaWord, u"LemmaWord") f = e.addConceptKeyType(pos, u"POS") if False: concepts.logConcepts() if True: logger.info(u"Find Collocations...") fc = Collocations() fc.find_collocations(concepts) if True: npbt = DocumentsSimilarity(al) logger.info(u"Create Topics") npbt.createTopics(concepts) if True: logger.info(u"Find Similarities") nc = npbt.findSimilarties() logger.debug(u"Topics") listTopics = list() ncg = npbt.topicConcepts.getConcepts().values() for x in ncg: logger.info(u"%s[%d]" % (x.name, x.count)) lt = (x.name, x.count) listTopics.append(lt) logger.info(u"Topics Sorted") with open(u"topic_sort.txt", "wb") as f: for x in sorted(listTopics, key=lambda c: abs(c[1]), reverse=False): output = "Topic : %s[%d]" % (x[0], x[1]) logger.info(output) f.write(output + os.linesep)