def requirementAnalysis(fileArchimate=None):

    if fileArchimate is None:
        fileArchimate = u"/Users/morrj140/Documents/SolutionEngineering/Archimate Models/DVC v38.archimate"

    al = ArchiLib(fileArchimate)

    conceptsFile = fileConceptsRequirements

    searchTypes = list()
    searchTypes.append(u"archimate:Requirement")
    nl = al.getTypeNodes(searchTypes)

    logger.info(u"Find Words in Requirements...")
    concepts = Concepts(u"Requirement", u"Requirements")
    n = 0
    for sentence in nl:
        n += 1
        logger.debug(u"%s" % sentence)

        c = concepts.addConceptKeyType(u"Document" + str(n), u"Document")
        d = c.addConceptKeyType(sentence, u"Sentence" + str(n))

        if True and sentence is not None:
            cleanSentence = ' '.join([word for word in sentence.split(u" ") if word not in stop])
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
                if len(word) > 1 and pos[0] == u"N":
                    e = d.addConceptKeyType(word, u"Word")
                    f = e.addConceptKeyType(pos, u"POS")

    Concepts.saveConcepts(concepts, conceptsFile)
    logger.info(u"Saved : %s" % conceptsFile)

    chunks = Chunks(concepts)
    chunks.createChunks()
def test_RequirementAnalysis(cleandir, fileArchimate):

    assert (os.path.isfile(filePPTXIn) is True)

    al = ArchiLib(fileArchimate)

    conceptsFile = fileConceptsRequirements

    searchTypes = list()
    searchTypes.append(u"archimate:Requirement")
    nl = al.getTypeNodes(searchTypes)

    logger.info(u"Find Words in Requirements...")
    concepts = Concepts(u"Requirement", u"Requirements")
    n = 0
    for sentence in nl:
        n += 1
        logger.debug(u"%s" % sentence)

        c = concepts.addConceptKeyType(u"Document" + unicode(n), u"Document")
        d = c.addConceptKeyType(sentence, u"Sentence" + unicode(n))

        if True and sentence is not None:
            cleanSentence = ' '.join([word for word in sentence.split(" ") if word not in stop])
            for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
                if len(word) > 1 and pos[0] == u"N":
                    e = d.addConceptKeyType(word, u"Word")
                    f = e.addConceptKeyType(pos, u"POS")

    Concepts.saveConcepts(concepts, conceptsFile)
    logger.info(u"Saved : %s" % conceptsFile)

    assert (os.path.isfile(conceptsFile) is True)

    chunks = Chunks(concepts)
    chunks.createChunks()

    assert (os.path.isfile(fileConceptsChunks) is True)
def gapSimilarity(fileArchimate, searchTypes):

    lemmatizer = WordNetLemmatizer()

    logger.info(u"Using : %s" % fileArchimate)

    al = ArchiLib(fileArchimate)

    nl = al.getTypeNodes(searchTypes)

    logger.info(u"Find Words...")
    concepts = Concepts(u"Word", u"Topic")

    n = 0
    for sentence in nl:
        n += 1

        if sentence is None:
            continue

        logger.info(u"%s" % sentence)

        c = concepts.addConceptKeyType(u"Document" + str(n), nl[sentence][ARCHI_TYPE])
        d = c.addConceptKeyType(sentence, nl[sentence][ARCHI_TYPE])

        cleanSentence = u' '.join([word for word in sentence.split(u" ") if word not in stop])
        for word, pos in nltk.pos_tag(nltk.wordpunct_tokenize(cleanSentence)):
            if len(word) > 1 and pos[0] == u"N":
                lemmaWord =lemmatizer.lemmatize(word.lower())
                e = d.addConceptKeyType(lemmaWord, u"LemmaWord")
                f = e.addConceptKeyType(pos, u"POS")

    if False:
        concepts.logConcepts()

    if True:
        logger.info(u"Find Collocations...")
        fc = Collocations()
        fc.find_collocations(concepts)

    if True:
        npbt = DocumentsSimilarity(al)

        logger.info(u"Create Topics")
        npbt.createTopics(concepts)

        if True:
            logger.info(u"Find Similarities")

            nc = npbt.findSimilarties()

            logger.debug(u"Topics")
            listTopics = list()
            ncg = npbt.topicConcepts.getConcepts().values()
            for x in ncg:
                logger.info(u"%s[%d]" % (x.name, x.count))
                lt = (x.name, x.count)
                listTopics.append(lt)

            logger.info(u"Topics Sorted")
            with open(u"topic_sort.txt", "wb") as f:
                for x in sorted(listTopics, key=lambda c: abs(c[1]), reverse=False):
                    output = "Topic : %s[%d]" % (x[0], x[1])
                    logger.info(output)
                    f.write(output + os.linesep)