def build_ngram_counts(inputtext=None, n=1, countspace=False, countpunctuation=False): """ Builds the ngram counts for a piece of text. :param inputtext: The text to measure. :param n: The n in n-gram :param countspace: Count spaces as a valid character in n-grams. :param countpunctuation: Count punctuation as valid characters in n-grams (minus new lines!) :return: A sorted OrderedDict containing the n-grams and counts of n-grams. """ if inputtext is None or n < 1: return None ngrams = dict() for c in range(len(inputtext)): if not (inputtext[c].isalpha() or (ispunct(inputtext[c]) and countpunctuation is True and inputtext[c] != '\n') or (inputtext[c] == ' ' and countspace is True)): continue i = 0 ngram = "" while len(ngram) < n and c+i < len(inputtext): if (inputtext[c+i].isalpha() or (ispunct(inputtext[c+i]) and countpunctuation is True and inputtext[c+i] != '\n') or (inputtext[c+i] == ' ' and countspace is True)): ngram += inputtext[c+i] i += 1 if len(ngram) == n: if ngram in ngrams: ngrams[ngram] += 1 else: ngrams[ngram] = 1 return sort_dict_by_value_reverse(ngrams)
def E_to_M(self, Entry): # convert to morse self.result = "" # initialize a string variable for char in Entry: # for every character in Entry if ispunct(char): # if it is punctuation: pass # do nothing (omit punctuation) else: # otherwise self.result += self.CODE[char.upper()] + " " # convert to morse, add a space and add to the end of self.result return self.result[:-1] # give back the result without the final space
def ShowAscii(char, code): if ascii.isalpha(code): print char, 'is an ascii alphabeta' elif ascii.isdigit(code): print char, 'is an ascii digital' elif ascii.ispunct(code): print char, 'is an ascii punctuation' else: print char, 'is an ascii code(not alphabeta, number or punctuation)'
def getRandomTweetClause(self, clauses): """ Choose a random clause from the set of tweet clauses :return: clause from given list """ # make selection clause = choice(clauses) # capitalize the first letter in the clause clause = clause[0].capitalize() + clause[1:] # make sure it's punctuated or the default clause if not ispunct(clause[-1]) and clause != '<3': clause += '!' return clause
def handleFile(fl): print "Handling " + fl entities = [r for r in csv.reader(open(fl + ".ent", "rU"), delimiter="\t")] body = " ".join([x.strip() for x in open(fl + ".body", "rU").readlines()]) sentences = sent_tokenize(body) words = [[word for word in word_tokenize(sentence)\ if ((len(word) == 1 and ascii.ispunct(word[0])) == False\ and word.lower() not in stopWords)]\ for sentence in sentences] fw = None form = "{0}\t{1}\t{2}\t{3}\t{4}\n" #articleId, entity, sequence, closeWord, proximity printed = False for record in entities: if len(record) != 5: print "ALERT: ", fl, record, len(fl) sys.stdout.flush() return articleId = record[0] entity = record[3] entityToSearch = record[4] seq = 0 filt = entityToSearch.split(", ") if len(filt) > 1: filt = [filt[0], filt[1].split(" (")[0]] else: filt = entityToSearch.split(" ") filt = list(reversed(filt)) for (sent, words) in zip(sentences, words): if entityToSearch in sent: seq += 1 proximity = 0 for word in filter(lambda wrd: wrd not in filt, words): if fw is None: fw = open(fl + ".word", "w") proximity += 1 #Fake proximity. To avoid duplicate records fw.write( form.format(articleId, entity, seq, word, proximity)) if (printed == False): print "Writing to " + fl + ".word" printed = True if (fw is not None): fw.flush() fw.close()
def is_punct(ch): x = ord(ch) # in no-formal literals, space is used as punctuation sometimes. if x < 127 and ascii.ispunct(x): return True # General Punctuation elif 0x2000 <= x <= 0x206f: return True # CJK Symbols and Punctuation elif 0x3000 <= x <= 0x303f: return True # Halfwidth and Fullwidth Forms elif 0xff00 <= x <= 0xffef: return True # CJK Compatibility Forms elif 0xfe30 <= x <= 0xfe4f: return True else: return False
def handleFile(fl): print "Handling " + fl entities = [r for r in csv.reader(open(fl + ".ent", "rU"), delimiter="\t")] body = " ".join([ x.strip() for x in open(fl + ".body", "rU").readlines()]) sentences = sent_tokenize(body) words = [[word for word in word_tokenize(sentence)\ if ((len(word) == 1 and ascii.ispunct(word[0])) == False\ and word.lower() not in stopWords)]\ for sentence in sentences] fw = None form = "{0}\t{1}\t{2}\t{3}\t{4}\n" #articleId, entity, sequence, closeWord, proximity printed = False for record in entities: if len(record) != 5: print "ALERT: ",fl,record,len(fl) sys.stdout.flush() return articleId = record[0] entity = record[3] entityToSearch = record[4] seq = 0 filt = entityToSearch.split(", ") if len(filt) > 1: filt = [filt[0], filt[1].split(" (")[0]] else: filt = entityToSearch.split(" ") filt = list(reversed(filt)) for (sent, words) in zip(sentences, words): if entityToSearch in sent: seq += 1 proximity = 0 for word in filter(lambda wrd: wrd not in filt, words): if fw is None: fw = open(fl + ".word", "w") proximity += 1 #Fake proximity. To avoid duplicate records fw.write(form.format(articleId, entity, seq, word, proximity)) if(printed == False): print "Writing to " + fl+ ".word" printed = True if(fw is not None): fw.flush() fw.close()
def generateTweet(self): """ Generates a unique tweet from a given set of sentence clauses :return: void """ # check if tweet clauses are available if self.tweetClauses: slices = self.divideClausesIntoSlices() # generate tweet from clause slices # longest slice clause is always first, shortest is last, and the middle are chosen at random for # n - 2 clausesToUse longestClause = self.getRandomTweetClause(slices[-1]) slices.pop(-1) shortestClause = self.getRandomTweetClause(slices[0]) slices.pop(0) # generate middle clauses if any slices are still left middleClauses = '' if slices: # shuffle middle slices shuffle(slices) for clauseSlice in slices: currentClause = self.getRandomTweetClause(clauseSlice) middleClauses += currentClause # cap w/ period if not already punctuated if not ispunct(middleClauses[-1]): middleClauses += '.' # concatonate tweet newTweet = longestClause + ' ' + middleClauses + ' ' + shortestClause self.generatedTweet = newTweet else: raise ValueError('no tweet clauses available')
def IsPunctuation(code): return IsGBKPunctuation(code) or ascii.ispunct(code)
def IsASCIIPunctuation(code): return ascii.ispunct(code)
def cipher_core(): """ This is the general function of Cipher, it is from it that users can encrypt or decrypt their various messages. """ def display_options_message(): """ Allows the user to choose to encrypt or decrypt a message he has in his possession. """ choiceOfFormat = '' choiceOfAction = '' print(f"{'-' * 63}\n{' ' * 27}OPTIONS\n{'-' * 63}") print("F - Fichier texte\nM - Message\n") while choiceOfFormat.upper() != 'F' and choiceOfFormat.upper() != 'M': choiceOfFormat = input("Quel format voulez-vous traiter [F/M] ? ") print(f"\n1 - Chiffrer\n2 - Déchiffrer\n{'-' * 63}\n") while choiceOfAction != '1' and choiceOfAction != '2': choiceOfAction = input("Entrez l'index d'une de ces options: ") return choiceOfFormat.upper() + choiceOfAction def display_message_entry_information(): """ Allows the user to enter the path of the text file or message they want to encrypt or decrypt. """ if choiceOfOption == "F1": return str(Path.home()) + '/' + \ input(f"Entrez le chemin de votre fichier:\n{str(Path.home())}/") elif choiceOfOption == "F2": return str(Path.home()) + '/cipherFolder/' + input( f"Entrez le chemin du fichier chiffré:\n{str(Path.home())}/cipherFolder/") elif choiceOfOption == "M1": return input("Entrez votre message: ") else: return input("Entrez le message chiffré: ") def create_overview(name, number): """ Allows you to create an overview of text file. """ return name[:number].replace('\n', '') message = '' choiceOfCipherMethod = '' while choiceOfCipherMethod not in ['1', '2', '3', '4'] and choiceOfCipherMethod.upper( ) != 'C' and choiceOfCipherMethod.upper() != 'Q': if choiceOfCipherMethod != '': os.system("clear") show_error(choiceOfCipherMethod) print("Méthodes d'encodage disponibles:") for index, nameMethods in enumerate(listMethods, 1): print(f"{index} - {nameMethods}") print(f"\nC - Crédits\nQ - Sortir de Cipher\n{'-' * 63}") choiceOfCipherMethod = input("\nEntrez l'index d'une de ces options: ") if choiceOfCipherMethod.upper() != 'Q': os.system("clear") if choiceOfCipherMethod == '1': choiceOfOption = display_options_message() choiceOfFolder = '' nameFolder = '' os.system("clear") print(f"{'-' * 63}\n{' ' * 22}CHIFFRE DE CÉSAR\n{'-' * 63}") while not message: message = display_message_entry_information() offset = input("Entrez la valeur du décalage: ") while offset.isnumeric() == False or int(offset) >= 26: if offset.isnumeric() == False: print("\n") show_error(offset, True) offset = input("\tVeuillez entrer le nombre de décalage: ") else: offset = input( "\nVous venez de dépasser la limite de décalage. (Max. 25)\nVeuillez entrer un nombre de décalage plus petit: ") if choiceOfOption == "F1": nameFolder = write_file( caesar_encryption( read_file(message), int(offset))) elif choiceOfOption == "F2": while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N': choiceOfFolder = input( "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ") if choiceOfFolder.upper() == 'O': pathFolder = input( f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/') if pathFolder[-1] == '/': list_pathFolder = list(pathFolder) list_pathFolder.pop(-1) pathFolder = ''.join(list_pathFolder) nameFolder = write_file( caesar_encryption( read_file(message), int(offset), True), pathFolder) else: nameFolder = write_file( caesar_encryption( read_file(message), int(offset), True)) print(f"\n\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(caesar_encryption(read_file(message), int(offset)), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption == 'F2' else f"\nVotre message chiffré est:\n{''.join(caesar_encryption(message, int(offset)))}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{caesar_encryption(message, int(offset), True)}") do_you_want_to_continue() elif choiceOfCipherMethod == '2': choiceOfOption = display_options_message() choiceOfFolder = '' nameFolder = '' os.system("clear") print(f"{'-' * 63}\n{' ' * 17}CHIFFREMENT PAR SUBSTITUTION\n{'-' * 63}") key = '' while not message: message = display_message_entry_information() if choiceOfOption[1] == '1': print("\nCONFIGURATION DE L'ALPHABET DE SUBSTITUTION\n") for letterAlphabet in string.ascii_uppercase: letterUser = input(f"\t{letterAlphabet}: ") while len(letterUser) != 1 or letterUser.upper() in key or letterUser.isnumeric( ) or letterUser.upper() == ' ' or ca.ispunct(letterUser): print("\n") show_error( letterUser, False, True, True) if letterUser.upper() in key else show_error( letterUser, False, True) letterUser = input(f"\t{letterAlphabet}: ") else: key += unidecode(letterUser).upper() else: while len(key.replace(' ', '')) != 26 or key.isnumeric(): key = input("Entrez votre clé de chiffrement: ") if choiceOfOption == "F1": nameFolder = write_file( substitution_encryption( read_file(message), unidecode(key).upper())) elif choiceOfOption == "F2": while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N': choiceOfFolder = input( "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ") if choiceOfFolder.upper() == 'O': pathFolder = input( f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/') if pathFolder[-1] == '/': list_pathFolder = list(pathFolder) list_pathFolder.pop(-1) pathFolder = ''.join(list_pathFolder) nameFolder = write_file( substitution_encryption( read_file(message), unidecode(key).replace( ' ', '').upper(), True), pathFolder) else: nameFolder = write_file( substitution_encryption( read_file(message), unidecode(key).replace( ' ', '').upper(), True)) print(f"\nVotre clé de chiffrement est:\n{' '.join(key)}\n\n\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(substitution_encryption(read_file(message), unidecode(key).upper()), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption == 'F2' else f"\nVotre clé de chiffrement est:\n{' '.join(key)}\nVotre message chiffré est:\n{substitution_encryption(message, unidecode(key).upper())}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{substitution_encryption(message, unidecode(key).replace(' ', '').upper(), True)}") do_you_want_to_continue() elif choiceOfCipherMethod == '3': choiceOfOption = display_options_message() choiceOfFolder = '' nameFolder = '' os.system("clear") print(f"{'-' * 63}\n{' ' * 21}CHIFFRE DE VIGENÈRE\n{'-' * 63}") while not message: message = display_message_entry_information() key = input("Entrez votre clé de chiffrement: ") if choiceOfOption == "F1": nameFolder = write_file( vigenere_encryption( read_file(message), key)) elif choiceOfOption == "F2": while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N': choiceOfFolder = input( "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ") if choiceOfFolder == 'O': pathFolder = input( f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/') if pathFolder[-1] == '/': list_pathFolder = list(pathFolder) list_pathFolder.pop(-1) pathFolder = ''.join(list_pathFolder) nameFolder = write_file( vigenere_encryption( read_file(message), key, True), pathFolder) else: nameFolder = write_file( vigenere_encryption( read_file(message), key, True)) print(f"\n\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(vigenere_encryption(read_file(message), key), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption == 'F2' else f"\nVotre message chiffré est:\n{''.join(vigenere_encryption(message, key))}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{vigenere_encryption(message, key, True)}") do_you_want_to_continue() elif choiceOfCipherMethod.upper() == '4': choiceOfOption = display_options_message() choiceOfFolder = '' nameFolder = '' encodableCharacters = string.ascii_uppercase + \ string.digits + '!\"$&\'()+,-./:;=?@_ ' os.system("clear") print(f"{'-' * 63}\n{' ' * 20}CODE MORSE INTERNATIONAL\n{'-' * 63}") while not message: message = display_message_entry_information() if choiceOfOption == "F1": nameFolder = write_file( substitution_encryption( read_file(message), morseCharacters, False, encodableCharacters)) elif choiceOfOption == "F2": while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N': choiceOfFolder = input( "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ") if choiceOfFolder.upper() == 'O': pathFolder = input( f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/') if pathFolder[-1] == '/': list_pathFolder = list(pathFolder) list_pathFolder.pop(-1) pathFolder = ''.join(list_pathFolder) nameFolder = write_file( substitution_encryption( read_file(message), morseCharacters, True, encodableCharacters), pathFolder) else: nameFolder = write_file( substitution_encryption( read_file(message), morseCharacters, True, encodableCharacters)) print(f"\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(substitution_encryption(read_file(message), morseCharacters, False, encodableCharacters), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption == 'F2' else f"\nVotre message chiffré est:\n{substitution_encryption(message, morseCharacters, False, encodableCharacters)}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{substitution_encryption(message, morseCharacters, True, encodableCharacters)}") do_you_want_to_continue() elif choiceOfCipherMethod.upper() == 'C': print(f"{'-' * 63}\n{' ' * 26}CRÉDITS\n{'-' * 63}\nVERSION:{' ' * 51}v1.0\n\nAUTEUR:{' ' * 45}Yann LE COZ\nÉTABLISSEMENT:{' ' * 29}Bordeaux Ynov Campus") do_you_want_to_continue() elif choiceOfCipherMethod.upper() == 'Q': print("\n") leave_software()