def MarkovGeneration(self, srcText='', dictManager=DictionaryManager()): self.__text = '' morph = pm.MorphAnalyzer() tokens = self.__CreateStructure(srcText) dictionary = dictManager.Dictionary() curWord = dictManager.StartKey() previousWord = dictManager.StartKey() for word in tokens: if str(morph.parse(word)[0].tag) == 'PNCT': self.__text += word + ' ' previousWord = word curWord = word else: wordList = self.__WordSelection(dictionary[curWord]) if len(wordList) == 0: while len(wordList) == 0: curWord = previousWord wordList = self.__WordSelection(dictionary[curWord]) previousWord = curWord curWord = wordList[random.randint(0, len(wordList) - 1)] if curWord == dictManager.EndKey(): curWord = dictManager.StartKey() previousWord = dictManager.StartKey() else: self.__text += curWord + ' ' return self.__text
def __init__(self, key): """Constructor for a client object """ self.key = key self.dictionaryManager = DictionaryManager(key) self.entityManager = EntityManager(key) self.conceptManager = ConceptManager(key) self.modelManager = ModelManager(key) self.categoryManager = CategoryManager(key) # Default parameter values for /semantic_tagging self.fields = '' self.filter_data = 'y' # Default parameter values for /check self.mode = 'all' self.group_errors = '2' self.check_spacing = 'n'
def MarkovGeneration(self, srcText='', srcTextTitle='text', dictionary=DictionaryManager()): genDocument = TextGenerator() genDocument.MarkovGeneration(srcText, dictionary) if os.path.exists('generated') == False: os.mkdir('generated') genDocument.SaveGenText('generated/', srcTextTitle)
def __init__(self, editor): self.editor = editor self.signals = Signals() self.triggers = TriggerManager(editor) connect_all(self, self.signals, self.triggers, textbuffer=self.editor.textbuffer) self.block_word_reset = False self.words = None self.start_word = None self.start_offset = None self.indexer = IndexerProcessManager(self.signals.sender, editor) self.dictionary_manager = DictionaryManager(self.signals.sender, editor) self.communicator = ProcessCommunicator(self.signals.sender, editor) self.extractor = TextExtractor(self.signals.sender, editor) self.buffer_monitor = BufferMonitor(self.signals.sender, editor)
def __init__(self,key): """ Constructor for a client object """ self.key = key; self.dictionaryManager = DictionaryManager(key) self.entityManager = EntityManager(key) self.conceptManager = ConceptManager(key) self.modelManager = ModelManager(key) self.categoryManager = CategoryManager(key) # Default parameter values for /semantic_tagging self.fields = '' self.filter_data = 'y' # Default parameter values for /check self.mode = 'all' self.group_errors = '2' self.check_spacing = 'n'
def __init__(self): self.dm = DictionaryManager() self.letter_shifts = list() one_shift = {'A': 'B', 'B': 'C', 'C': 'D', 'D': 'E', 'E': 'F', 'F': 'G', 'G': 'H', 'H': 'I', 'I': 'J', 'J': 'K', 'K': 'L', 'L': 'M', 'M': 'N', 'N': 'O', 'O': 'P', 'P': 'Q', 'Q': 'R', 'R': 'S', 'S': 'T', 'T': 'U', 'U': 'V', 'V': 'W', 'W': 'X', 'X': 'Y', 'Y': 'Z', 'Z': 'A' } cur_shift = one_shift.copy() for _ in range(26): next_shift = cur_shift.copy() for k, v in cur_shift.items(): next_shift[k] = one_shift[v] cur_shift = next_shift self.letter_shifts.append(cur_shift)
class SemPubClient: def __init__(self, key): """Constructor for a client object """ self.key = key self.dictionaryManager = DictionaryManager(key) self.entityManager = EntityManager(key) self.conceptManager = ConceptManager(key) self.modelManager = ModelManager(key) self.categoryManager = CategoryManager(key) # Default parameter values for /semantic_tagging self.fields = '' self.filter_data = 'y' # Default parameter values for /check self.mode = 'all' self.group_errors = '2' self.check_spacing = 'n' # Setters for configuration parameters def setAnalysisFields(self, fields): self.fields = fields def setAnalysisFilterData(self, filter_data): self.filter_data = filter_data def setCheckMode(self, mode): self.model = mode def setCheckGroupErrors(self, group_errors): self.group_errors = group_errors def setCheckSpacing(self, check_spacing): self.check_spacing = check_spacing def __parseResponse(self, response): """ Helper method that parses the result or throws an Exception if the service returns an error """ if response.status_code == requests.codes.ok: r = response.json() return r['result'] else: r = response.json() raise SemPubException(response.status_code, r['status']) # Semantic tagging service operations def analyzeDocument(self, document, dictionary=None, models=None): """ Returns the text of the document analyzed including all the extracted semantic information. It takes into account document metadata (language, source, timeref) to build a more accurate analysis :param document: :class:'Document' to be analyzed :param dictionary (optional): a user defined :class:'Dictionary' to include for tagging :param models (optional): a list of user defined :class:'Model: to include for classification """ payload = { 'key': self.key, 'doc': str(document), 'filter_data': self.filter_data, 'fields': self.fields } if (dictionary is not None): payload['dictionary'] = dictionary['name'] if (models is not None): if (isinstance(models, list)): modelnames = map(lambda x: x['name'], models) payload['model'] = modelnames else: payload['model'] = models['name'] payload['src'] = 'sdk-python-1.0' endpoint = TAGGING_SERVICE_ENDPOINT response = requests.post(endpoint, data=payload) return self.__parseResponse(response) def analyzeText(self, text, lang, dictionary=None, models=None): """ Returns the text analyzed including all the extracted semantic information :param text: text to be analyzed :param lang: language of the text :param dictionary (optional): a user defined :class:'Dictionary' to include for tagging :param models (optional): a list of user defined :class:'Model: to include for classification """ doc = Document(1, text) doc['language'] = lang return self.analyzeDocument(doc, dictionary, models) # Text proofreading service operations def checkDocument(self, document, doc_offset=0, dictionary=None): """ Returns the proofreading issues found in the document text It takes into account document metadata (language) :param document: :class:'Document' to be analyzed :param doc_offset: offset in characters from where to start proofreading :param dictionary (optional): a user defined :class:'Dictionary' that marks words in our dictionary as known """ payload = { 'key': self.key, 'doc': str(document), 'doc_offset': doc_offset, 'mode': self.mode, 'group_errors': self.group_errors, 'check_spacing': self.check_spacing } if (dictionary is not None): payload['dictionary'] = dictionary['name'] payload['src'] = 'sdk-python-1.0' endpoint = CHECK_SERVICE_ENDPOINT response = requests.post(endpoint, data=payload) return self.__parseResponse(response) def checkText(self, text, lang, doc_offset=0, dictionary=None): """ Returns the proofreading issues found in the text :param text: text to be analyzed :param lang: language of the text :param doc_offset: offset in characters from where to start proofreading :param dictionary (optional): a user defined :class:'Dictionary'that marks words in our dictionary as known """ doc = Document(1, text) doc['language'] = lang return self.checkDocument(doc, doc_offset, dictionary) # CRUD operations on Dictionary def getDictionaryList(self, query, lang): """ List of user-defined dictionaries :param query: regular expresion to filter dictionaries :param lang: filter dictionaries in this language, use 'all' if a multilingual dictionary """ return self.dictionaryManager.getList(query, lang) def createDictionary(self, dictionary): return self.dictionaryManager.create(dictionary) def readDictionary(self, name): return self.dictionaryManager.read(name) def updateDictionary(self, dictionary): return self.dictionaryManager.update(dictionary) def deleteDictionary(self, name): return self.dictionaryManager.delete(name) def deleteDictionary(self, dictionary): return self.dictionaryManager.delete(dictionary.getId()) # CRUD operations on Entity def getEntityList(self, dictionary, query): """ Shows a list of entities (:class:'Entity') included in the dictionary matching the query :param dictionary: a :class:'Dictionary' object :param query: a regular expression """ return self.entityManager.getList(dictionary.getId(), query) def createEntity(self, entity, dictionary): return self.entityManager.create(entity, dictionary.getId()) def readEntity(self, id, dictionary): return self.entityManager.read(id, dictionary.getId()) def updateEntity(self, entity, dictionary): return self.entityManager.update(entity, dictionary.getId()) def deleteEntity(self, id, dictionary): return self.entityManager.delete(id, dictionary.getId()) def deleteEntity(self, entity, dictionary): return self.entityManager.delete(entity.getId(), dictionary.getId()) # CRUD operations on Concept def getConceptList(self, dictionary, query): """ Shows a list of concepts (:class:'Concept') included in the dictionary matching the query :param dictionary: a :class:'Dictionary' object :param query: a regular expression """ return self.conceptManager.getList(dictionary.getId(), query) def createConcept(self, concept, dictionary): return self.conceptManager.create(concept, dictionary.getId()) def readConcept(self, id, dictionary): return self.conceptManager.read(id, dictionary.getId()) def updateConcept(self, concept, dictionary): return self.conceptManager.update(concept, dictionary.getId()) def deleteConcept(self, id, dictionary): return self.conceptManager.delete(id, dictionary.getId()) def deleteConcept(self, concept, dictionary): return self.conceptManager.delete(concept.getId(), dictionary.getId()) # CRUD operations on Model def getModelList(self, query, lang): """ List of user-defined models :param query: regular expresion to filter dictionaries :param lang: filter dictionaries in this language """ return self.modelManager.getList(query, lang) def createModel(self, model): return self.modelManager.create(model) def readModel(self, name): return self.modelManager.read(name) def updateModel(self, model): return self.modelManager.update(model) def deleteModel(self, name): return self.modelManager.delete(name) def deleteModel(self, model): return self.modelManager.delete(model.getId()) # CRUD operation on Category def getCategoryList(self, model, query): """ Show a list of categories (:class:'Category') included in the model matching the query :param model: a :class:'Model' object :param query: a regular expression """ return self.categoryManager.getList(model.getId(), query) def createCategory(self, category, model): return self.categoryManager.create(category, model.getId()) def readCategory(self, id, model): return self.categoryManager.read(id, model.getId()) def updateCategory(self, category, model): return self.categoryManager.update(category, model.getId()) def deleteCategory(self, id, model): return self.categoryManager.delete(id, model.getId()) def deleteCategory(self, category, model): return self.categoryManager.delete(category.getId(), model.getId())
class CaesarCode(object): def __init__(self): self.dm = DictionaryManager() self.letter_shifts = list() one_shift = {'A': 'B', 'B': 'C', 'C': 'D', 'D': 'E', 'E': 'F', 'F': 'G', 'G': 'H', 'H': 'I', 'I': 'J', 'J': 'K', 'K': 'L', 'L': 'M', 'M': 'N', 'N': 'O', 'O': 'P', 'P': 'Q', 'Q': 'R', 'R': 'S', 'S': 'T', 'T': 'U', 'U': 'V', 'V': 'W', 'W': 'X', 'X': 'Y', 'Y': 'Z', 'Z': 'A' } cur_shift = one_shift.copy() for _ in range(26): next_shift = cur_shift.copy() for k, v in cur_shift.items(): next_shift[k] = one_shift[v] cur_shift = next_shift self.letter_shifts.append(cur_shift) # common_letters = ['A', 'E', 'I', 'O', 'U'] # for h in common_letters: # for w in common_letters: # if w is not h: # for e in common_letters: # if e is not h and e is not w: # self.letter_shifts.append({'H': h, # 'W': w, # 'E': e}) # self.letter_shifts = list() # self.letter_shifts.append({ # 'L': 'N', # 'W': 'A' # }) # self.letter_shifts = self.make_shifts_01() def make_shifts_01(self): shifts = list() wwwword_list = ['AANVAARDBAAR', 'AANVECHTBAAR', 'BANANENVLAAI', 'CENTRIFUGAAL', 'CENTRIPETAAL', 'CENTRIPETAAT', 'CONGLOMERAAT', 'CONJECTURAAL', 'CONTACTDRAAD', 'CONTINENTAAL', 'CONTUBERNAAL', 'DONDERSTRAAL', 'EENRODEDRAAD', 'EENHANDSZAAG', 'EENHEIDSMAAT', 'EINDKAPITAAL', 'EINDKWARTAAL', 'FANTASIENAAM', 'GANGSTERBAAS', 'GENIESOLDAAT', 'HANENGEKRAAI', 'HONDSBRUTAAL', 'KANTTEKENAAR', 'KINDERSCHAAR', 'KONINGSKRAAI', 'KONINGSZWAAN', 'KUNSTIJSBAAN', 'KUNSTMINNAAR', 'LANDEIGENAAR', 'LANDINGSBAAN', 'LANDSDIENAAR', 'LANTAARNHAAI', 'LANTAARNHAAK', 'LANTAARNPAAL', 'LANTARENPAAL', 'LENTEVERMAAK', 'LINGERIEZAAK', 'LONGITUDIAAL', 'MENGAPPARAAT', 'MINISTERRAAD', 'MONDVOORRAAD', 'MONNIKENBAAI', 'MONNIKSCHAAP', 'NONSENSICAAL', 'NONSENSIKAAL', 'ONNAVOLGBAAR', 'ORNITHOGRAAF', 'PANAMAKANAAL', 'PENNINGPLAAT', 'PONSAPPARAAT', 'SINTNICOLAAS', 'TENNISLERAAR', 'TENONDERGAAN', 'TONEELGEBAAR', 'VANIEPENDAAL', 'VANILLESMAAK', 'VENSTERSTAAF', 'VENSTERVRAAT', 'VINGERSPRAAK', 'WANDELSTRAAT', 'WANWIJNSMAAK', 'WINKELSTRAAT', 'ZANGPAPEGAAI', 'ZINKCHROMAAT', 'ZINKMINERAAL', 'ZONRESULTAAT', 'HEKSENGELOOF', 'KOKELEKONOOT', 'TEKENPOTLOOD'] for word in wwwword_list: shift = dict() for src_letter, dst_letter in zip('EILXPAYODWWH', word): shift[src_letter] = dst_letter shifts.append(shift) return shifts def caesar_code(self, input_text): input_text = input_text.upper() for letter_shift in self.letter_shifts: text = ''.join([letter_shift[x] if x in letter_shift else x.lower() for x in input_text]) pprint(text) def print_letter_permutations(self, input_text): '''Print the input text with all possible letter permutations.''' letters_sorted = self._sort_letters(input_text) all_letters = self._sorted_letters_by_frequency() for i in range(6, 6 + 1): src_letters = letters_sorted[:i] dst_letter_max = 26 dst_letter_permutations = itertools.permutations(all_letters[:dst_letter_max], i) for dst_letters in dst_letter_permutations: shift = dict(zip(src_letters, dst_letters)) permutated_text = self._perform_permutation(input_text, shift) print(permutated_text) def determine_letter_permutation(self, input_text, allow_double_letters): '''Determine the permutation that results in a valid text''' self.input_text = ''.join(x for x in input_text if x in string.ascii_letters or x == ' ') self.letters_sorted = self.sort_letters(input_text) print(self.letters_sorted) self.all_letters = self._sorted_letters_by_frequency() letter_shift = dict(zip(self.all_letters, ['.'] * len(self.all_letters))) r = self.add_letter_to_letter_shift(letter_shift, index=0, allow_double_letters=allow_double_letters) if r[0]: result = dict() for k, v in r[0].items(): if v != '.': result[k] = v print(result) output_text = self._perform_permutation(input_text, result) print(output_text) def add_letter_to_letter_shift(self, letter_shift, index, allow_double_letters): for letter_dst in self.all_letters: if not allow_double_letters and letter_dst in letter_shift.values(): continue letter_shift[self.letters_sorted[index]] = letter_dst text = self._perform_permutation(self.input_text, letter_shift) print(text) valid_text = True for word in text.split(): if not self.dm.words_with_pattern(word, exists=True): valid_text = False # print('Word {} is invalid'.format(word)) break if valid_text: if index < len(self.letters_sorted) - 1: letter_shift, ready = self.add_letter_to_letter_shift(letter_shift, index + 1, allow_double_letters) if ready: return (letter_shift, True) else: return (letter_shift, True) else: continue letter_shift[self.letters_sorted[index]] = '.' return (letter_shift, False) def count_letters(self, input_text): '''Return a letter count for an input text.''' count_dict = {} for x in string.ascii_uppercase: count_dict[x] = input_text.count(x) return count_dict def sort_letters(self, input_text): letter_count = self.count_letters(input_text) letters = list() counts = list() for k, v in letter_count.items(): if v > 0: letters.append(k) counts.append(v) letters_sorted = [i[0] for i in sorted(zip(letters, counts), key=lambda l: l[1], reverse=True)] return letters_sorted def _sorted_letters_by_frequency(self): '''From https://onzetaal.nl/taaladvies/advies/letterfrequentie-in-het-nederlands''' return ['E', 'N', 'A', 'T', 'I', 'R', 'O', 'D', 'S', 'L', 'G', 'V', 'H', 'K', 'M', 'U', 'B', 'P', 'W', 'J', 'Z', 'C', 'F', 'X', 'Y', 'Q'] def _perform_permutation(self, input_text, letter_shift): input_text = input_text.upper() text = ''.join([letter_shift[x] if x in letter_shift else x.lower() for x in input_text]) return text
class SemPubClient: def __init__(self,key): """ Constructor for a client object """ self.key = key; self.dictionaryManager = DictionaryManager(key) self.entityManager = EntityManager(key) self.conceptManager = ConceptManager(key) self.modelManager = ModelManager(key) self.categoryManager = CategoryManager(key) # Default parameter values for /semantic_tagging self.fields = '' self.filter_data = 'y' # Default parameter values for /check self.mode = 'all' self.group_errors = '2' self.check_spacing = 'n' # Setters for configuration parameters def setAnalysisFields(self, fields): self.fields = fields def setAnalysisFilterData(self, filter_data): self.filter_data = filter_data def setCheckMode(self, mode): self.model = mode def setCheckGroupErrors(self, group_errors): self.group_errors = group_errors def setCheckSpacing(self, check_spacing): self.check_spacing = check_spacing def __parseResponse(self, response): """Helper methos that parses the result ot throws an Exception if the service returns an error""" if response.status_code == requests.codes.ok: r = response.json() return r['result'] else: r = response.json() raise SemPubException(response.status_code, r['status']) # Semantic tagging services def analyzeDocument(self, document, dictionary=None, models=None): """ returns the text of the document analyzed including all the extracted semantic information. It takes into account document metadata (language, source, timeref) to build more accurate analysis :param document: :class:'Document' to be analyzed :param dictionary (optional): a user defined :class:'Dictionary' to include for tagging :param models (optional): a list of user defined :class:'Model: to include for classification """ payload = { 'key': self.key, 'doc': str(document), 'filter_data' : self.filter_data, 'fields' : self.fields } if (dictionary is not None): payload['dictionary'] = dictionary['name'] if (models is not None): if (isinstance(models,list)): modelnames = map(lambda x: x['name'], models) payload['model'] = modelnames else: payload['model'] = models['name'] endpoint = TAGGING_SERVICE_ENDPOINT response = requests.post(endpoint, data = payload) return self.__parseResponse(response) def analyzeText(self, text, lang, dictionary=None, models=None): """ returns the text analyzed including all the extracted semantic information :param text: text to be analyzed :param lang: language of the text :param dictionary (optional): a user defined :class:'Dictionary' to include for tagging :param models (optional): a list of user defined :class:'Model: to include for classification """ doc = Document(1, text) doc['language'] = lang; return self.analyzeDocument(doc, dictionary, models); # Text proofreading services def checkDocument(self, document, doc_offset = 0, dictionary = None): """ returns the proofreading issues found in the document text It takes into account document metadata (language) :param document: :class:'Document' to be analyzed :param doc_offset: offset in characters from where to start proofreading :param dictionary (optional): a user defined :class:'Dictionary' that marks words in our dictionary as known """ payload = { 'key': self.key, 'doc': str(document) , 'doc_offset' : doc_offset, 'mode' : self.mode, 'group_errors' : self.group_errors, 'check_spacing' : self.check_spacing } if (dictionary is not None): payload['dictionary'] = dictionary['name'] endpoint = CHECK_SERVICE_ENDPOINT response = requests.post(endpoint, data = payload) return self.__parseResponse(response) def checkText(self, text, lang, doc_offset = 0, dictionary = None): """ returns the proofreading issues found in the text :param text: text to be analyzed :param lang: language of the text :param doc_offset: offset in characters from where to start proofreading :param dictionary (optional): a user defined :class:'Dictionary'that marks words in our dictionary as known """ doc = Document(1, text) doc['language'] = lang; return self.checkDocument(doc, doc_offset, dictionary); # CRUD operations on Dictionary def getDictionaryList(self, query, lang): """ List of use-defined dictionaries :param query: regular expresion to filter dictionaries :param lang: filter dictionaries in this language, use 'all' if a multilingual dictionary """ return self.dictionaryManager.getList(query, lang) def createDictionary(self, dictionary): return self.dictionaryManager.create(dictionary) def readDictionary(self, name): return self.dictionaryManager.read(name) def updateDictionary(self, dictionary): return self.dictionaryManager.update(dictionary) def deleteDictionary(self, name): return self.dictionaryManager.delete(name) def deleteDictionary(self, dictionary): return self.dictionaryManager.delete(dictionary.getId()) # CRUD operations on Entity def getEntityList(self, dictionary, query): """ Show a list of entities (:class:'Entity') included in the dictionary matching the query :param dictionary: a :class:'Dictionary' object :param query: a regular expression """ return self.entityManager.getList(dictionary.getId(), query) def createEntity(self, entity, dictionary): return self.entityManager.create(entity, dictionary.getId()) def readEntity(self, id, dictionary): return self.entityManager.read(id, dictionary.getId()) def updateEntity(self, entity, dictionary): return self.entityManager.update(entity, dictionary.getId()) def deleteEntity(self, id, dictionary): return self.entityManager.delete(id, dictionary.getId()) def deleteEntity(self, entity, dictionary): return self.entityManager.delete(entity.getId(), dictionary.getId()) # CRUD operations on Concept def getConceptList(self, dictionary, query): """ Show a list of concepts (:class:'Concept') included in the dictionary matching the query :param dictionary: a :class:'Dictionary' object :param query: a regular expression """ return self.conceptManager.getList(dictionary.getId(), query) def createConcept(self, concept, dictionary): return self.conceptManager.create(concept, dictionary.getId()) def readConcept(self, id, dictionary): return self.conceptManager.read(id, dictionary.getId()) def updateConcept(self, concept, dictionary): return self.conceptManager.update(concept, dictionary.getId()) def deleteConcept(self, id, dictionary): return self.conceptManager.delete(id, dictionary.getId()) def deleteConcept(self, concept, dictionary): return self.conceptManager.delete(concept.getId(), dictionary.getId()) # CRUD operations on Model def getModelList(self, query, lang): """ List of user-defined models :param query: regular expresion to filter dictionaries :param lang: filter dictionaries in this language """ return self.modelManager.getList(query, lang) def createModel(self, model): return self.modelManager.create(model) def readModel(self, name): return self.modelManager.read(name) def updateModel(self, model): return self.modelManager.update(model) def deleteModel(self, name): return self.modelManager.delete(name) def deleteModel(self, model): return self.modelManager.delete(model.getId()) # CRUD operation on Category def getCategoryList(self, model, query): """ Show a list of categories (:class:'Category') included in the model matching the query :param model: a :class:'Model' object :param query: a regular expression """ return self.categoryManager.getList(model.getId(), query) def createCategory(self, category, model): return self.categoryManager.create(category, model.getId()) def readCategory(self, id, model): return self.categoryManager.read(id, model.getId()) def updateCategory(self, category, model): return self.categoryManager.update(category, model.getId()) def deleteCategory(self, id, model): return self.categoryManager.delete(id, model.getId()) def deleteCategory(self, category, model): return self.categoryManager.delete(category.getId(), model.getId())