def build_ngram_counts(inputtext=None, n=1, countspace=False, countpunctuation=False):
    """
    Builds the ngram counts for a piece of text.
    :param inputtext: The text to measure.
    :param n: The n in n-gram
    :param countspace: Count spaces as a valid character in n-grams.
    :param countpunctuation: Count punctuation as valid characters in n-grams (minus new lines!)
    :return: A sorted OrderedDict containing the n-grams and counts of n-grams.
    """
    if inputtext is None or n < 1:
        return None

    ngrams = dict()
    for c in range(len(inputtext)):
        if not (inputtext[c].isalpha() or
                (ispunct(inputtext[c]) and countpunctuation is True and inputtext[c] != '\n') or
                (inputtext[c] == ' ' and countspace is True)):
            continue

        i = 0
        ngram = ""
        while len(ngram) < n and c+i < len(inputtext):
            if (inputtext[c+i].isalpha() or
                (ispunct(inputtext[c+i]) and countpunctuation is True and inputtext[c+i] != '\n') or
                    (inputtext[c+i] == ' ' and countspace is True)):
                ngram += inputtext[c+i]
            i += 1

        if len(ngram) == n:
            if ngram in ngrams:
                ngrams[ngram] += 1
            else:
                ngrams[ngram] = 1

    return sort_dict_by_value_reverse(ngrams)
	def E_to_M(self, Entry): # convert to morse
		self.result = "" # initialize a string variable
		for char in Entry: # for every character in Entry
			if ispunct(char): # if it is punctuation:
				pass # do nothing (omit punctuation)
			else: # otherwise
				self.result += self.CODE[char.upper()] + " " # convert to morse, add a space and add to the end of self.result
		return self.result[:-1] # give back the result without the final space
Exemple #3
0
def ShowAscii(char, code):
    if ascii.isalpha(code):
        print char, 'is an ascii alphabeta'
    elif ascii.isdigit(code):
        print char, 'is an ascii digital'
    elif ascii.ispunct(code):
        print char, 'is an ascii punctuation'
    else:
        print char, 'is an ascii code(not alphabeta, number or punctuation)'
Exemple #4
0
    def getRandomTweetClause(self, clauses):
        """
        Choose a random clause from the set of tweet clauses

        :return: clause from given list
        """

        # make selection
        clause = choice(clauses)

        # capitalize the first letter in the clause
        clause = clause[0].capitalize() + clause[1:]

        # make sure it's punctuated or the default clause
        if not ispunct(clause[-1]) and clause != '<3':
            clause += '!'

        return clause
Exemple #5
0
def handleFile(fl):
    print "Handling " + fl
    entities = [r for r in csv.reader(open(fl + ".ent", "rU"), delimiter="\t")]
    body = " ".join([x.strip() for x in open(fl + ".body", "rU").readlines()])
    sentences = sent_tokenize(body)
    words = [[word for word in word_tokenize(sentence)\
              if ((len(word) == 1 and ascii.ispunct(word[0])) == False\
              and word.lower() not in stopWords)]\
             for sentence in sentences]
    fw = None
    form = "{0}\t{1}\t{2}\t{3}\t{4}\n"  #articleId, entity, sequence, closeWord, proximity
    printed = False
    for record in entities:
        if len(record) != 5:
            print "ALERT: ", fl, record, len(fl)
            sys.stdout.flush()
            return

        articleId = record[0]
        entity = record[3]
        entityToSearch = record[4]
        seq = 0
        filt = entityToSearch.split(", ")
        if len(filt) > 1:
            filt = [filt[0], filt[1].split(" (")[0]]
        else:
            filt = entityToSearch.split(" ")
        filt = list(reversed(filt))
        for (sent, words) in zip(sentences, words):
            if entityToSearch in sent:
                seq += 1
                proximity = 0
                for word in filter(lambda wrd: wrd not in filt, words):
                    if fw is None:
                        fw = open(fl + ".word", "w")
                    proximity += 1  #Fake proximity. To avoid duplicate records
                    fw.write(
                        form.format(articleId, entity, seq, word, proximity))
                    if (printed == False):
                        print "Writing to " + fl + ".word"
                        printed = True
    if (fw is not None):
        fw.flush()
        fw.close()
Exemple #6
0
def is_punct(ch):
    x = ord(ch)
    # in no-formal literals, space is used as punctuation sometimes.
    if x < 127 and ascii.ispunct(x):
        return True
    # General Punctuation
    elif 0x2000 <= x <= 0x206f:
        return True
    # CJK Symbols and Punctuation
    elif 0x3000 <= x <= 0x303f:
        return True
    # Halfwidth and Fullwidth Forms
    elif 0xff00 <= x <= 0xffef:
        return True
    # CJK Compatibility Forms
    elif 0xfe30 <= x <= 0xfe4f:
        return True
    else:
        return False
def is_punct(ch):
    x = ord(ch)
    # in no-formal literals, space is used as punctuation sometimes.
    if x < 127 and ascii.ispunct(x):
        return True
    # General Punctuation
    elif 0x2000 <= x <= 0x206f:
        return True
    # CJK Symbols and Punctuation
    elif 0x3000 <= x <= 0x303f:
        return True
    # Halfwidth and Fullwidth Forms
    elif 0xff00 <= x <= 0xffef:
        return True
    # CJK Compatibility Forms
    elif 0xfe30 <= x <= 0xfe4f:
        return True
    else:
        return False
def handleFile(fl): 
    print "Handling " + fl
    entities = [r for r in csv.reader(open(fl + ".ent", "rU"), delimiter="\t")]
    body = " ".join([ x.strip() for x in open(fl + ".body", "rU").readlines()])
    sentences = sent_tokenize(body)
    words = [[word for word in word_tokenize(sentence)\
              if ((len(word) == 1 and ascii.ispunct(word[0])) == False\
              and word.lower() not in stopWords)]\
             for sentence in sentences]
    fw = None
    form = "{0}\t{1}\t{2}\t{3}\t{4}\n" #articleId, entity, sequence, closeWord, proximity
    printed = False
    for record in entities:
        if len(record) != 5:
          print "ALERT: ",fl,record,len(fl)
          sys.stdout.flush()
          return

        articleId = record[0]
        entity = record[3]
        entityToSearch = record[4]
        seq = 0
        filt = entityToSearch.split(", ")
        if len(filt) > 1:
            filt = [filt[0], filt[1].split(" (")[0]]
        else:
            filt = entityToSearch.split(" ")
        filt = list(reversed(filt))
        for (sent, words) in zip(sentences, words):
            if entityToSearch in sent:
                seq += 1
                proximity = 0
                for word in filter(lambda wrd: wrd not in filt, words):
                    if fw is None:
                        fw = open(fl + ".word", "w")
                    proximity += 1 #Fake proximity. To avoid duplicate records
                    fw.write(form.format(articleId, entity, seq, word, proximity))
                    if(printed == False):
                        print "Writing to " + fl+ ".word"
                        printed = True
    if(fw is not None):
        fw.flush()
        fw.close()                
Exemple #9
0
    def generateTweet(self):
        """
        Generates a unique tweet from a given set of sentence clauses

        :return: void
        """

        # check if tweet clauses are available
        if self.tweetClauses:
            slices = self.divideClausesIntoSlices()

            # generate tweet from clause slices
            # longest slice clause is always first, shortest is last, and the middle are chosen at random for
            # n - 2 clausesToUse
            longestClause = self.getRandomTweetClause(slices[-1])
            slices.pop(-1)

            shortestClause = self.getRandomTweetClause(slices[0])
            slices.pop(0)

            # generate middle clauses if any slices are still left
            middleClauses = ''
            if slices:
                # shuffle middle slices
                shuffle(slices)
                for clauseSlice in slices:
                    currentClause = self.getRandomTweetClause(clauseSlice)
                    middleClauses += currentClause

                # cap w/ period if not already punctuated
                if not ispunct(middleClauses[-1]):
                    middleClauses += '.'

            # concatonate tweet
            newTweet = longestClause + ' ' + middleClauses + ' ' + shortestClause

            self.generatedTweet = newTweet
        else:
            raise ValueError('no tweet clauses available')
Exemple #10
0
def IsPunctuation(code):
    return IsGBKPunctuation(code) or ascii.ispunct(code)
Exemple #11
0
def IsASCIIPunctuation(code):
    return ascii.ispunct(code)
Exemple #12
0
def cipher_core():
    """
    This is the general function of Cipher, it is from it that users can encrypt or decrypt their various messages.
    """

    def display_options_message():
        """
        Allows the user to choose to encrypt or decrypt a message he has in his possession.
        """
        choiceOfFormat = ''
        choiceOfAction = ''
        print(f"{'-' * 63}\n{' ' * 27}OPTIONS\n{'-' * 63}")
        print("F - Fichier texte\nM - Message\n")
        while choiceOfFormat.upper() != 'F' and choiceOfFormat.upper() != 'M':
            choiceOfFormat = input("Quel format voulez-vous traiter [F/M] ? ")
        print(f"\n1 - Chiffrer\n2 - Déchiffrer\n{'-' * 63}\n")
        while choiceOfAction != '1' and choiceOfAction != '2':
            choiceOfAction = input("Entrez l'index d'une de ces options: ")
        return choiceOfFormat.upper() + choiceOfAction

    def display_message_entry_information():
        """
        Allows the user to enter the path of the text file or message they want to encrypt or decrypt.
        """
        if choiceOfOption == "F1":
            return str(Path.home()) + '/' + \
                input(f"Entrez le chemin de votre fichier:\n{str(Path.home())}/")
        elif choiceOfOption == "F2":
            return str(Path.home()) + '/cipherFolder/' + input(
                f"Entrez le chemin du fichier chiffré:\n{str(Path.home())}/cipherFolder/")
        elif choiceOfOption == "M1":
            return input("Entrez votre message: ")
        else:
            return input("Entrez le message chiffré: ")

    def create_overview(name, number):
        """
        Allows you to create an overview of text file.
        """
        return name[:number].replace('\n', '')

    message = ''
    choiceOfCipherMethod = ''
    while choiceOfCipherMethod not in ['1', '2', '3', '4'] and choiceOfCipherMethod.upper(
    ) != 'C' and choiceOfCipherMethod.upper() != 'Q':
        if choiceOfCipherMethod != '':
            os.system("clear")
            show_error(choiceOfCipherMethod)
        print("Méthodes d'encodage disponibles:")
        for index, nameMethods in enumerate(listMethods, 1):
            print(f"{index} - {nameMethods}")
        print(f"\nC - Crédits\nQ - Sortir de Cipher\n{'-' * 63}")
        choiceOfCipherMethod = input("\nEntrez l'index d'une de ces options: ")
    if choiceOfCipherMethod.upper() != 'Q':
        os.system("clear")
    if choiceOfCipherMethod == '1':
        choiceOfOption = display_options_message()
        choiceOfFolder = ''
        nameFolder = ''
        os.system("clear")
        print(f"{'-' * 63}\n{' ' * 22}CHIFFRE DE CÉSAR\n{'-' * 63}")
        while not message:
            message = display_message_entry_information()
        offset = input("Entrez la valeur du décalage: ")
        while offset.isnumeric() == False or int(offset) >= 26:
            if offset.isnumeric() == False:
                print("\n")
                show_error(offset, True)
                offset = input("\tVeuillez entrer le nombre de décalage: ")
            else:
                offset = input(
                    "\nVous venez de dépasser la limite de décalage. (Max. 25)\nVeuillez entrer un nombre de décalage plus petit: ")
        if choiceOfOption == "F1":
            nameFolder = write_file(
                caesar_encryption(
                    read_file(message),
                    int(offset)))
        elif choiceOfOption == "F2":
            while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N':
                choiceOfFolder = input(
                    "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ")
            if choiceOfFolder.upper() == 'O':
                pathFolder = input(
                    f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/')
                if pathFolder[-1] == '/':
                    list_pathFolder = list(pathFolder)
                    list_pathFolder.pop(-1)
                    pathFolder = ''.join(list_pathFolder)
                nameFolder = write_file(
                    caesar_encryption(
                        read_file(message),
                        int(offset),
                        True),
                    pathFolder)
            else:
                nameFolder = write_file(
                    caesar_encryption(
                        read_file(message),
                        int(offset),
                        True))
        print(f"\n\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(caesar_encryption(read_file(message), int(offset)), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption ==
              'F2' else f"\nVotre message chiffré est:\n{''.join(caesar_encryption(message, int(offset)))}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{caesar_encryption(message, int(offset), True)}")
        do_you_want_to_continue()
    elif choiceOfCipherMethod == '2':
        choiceOfOption = display_options_message()
        choiceOfFolder = ''
        nameFolder = ''
        os.system("clear")
        print(f"{'-' * 63}\n{' ' * 17}CHIFFREMENT PAR SUBSTITUTION\n{'-' * 63}")
        key = ''
        while not message:
            message = display_message_entry_information()
        if choiceOfOption[1] == '1':
            print("\nCONFIGURATION DE L'ALPHABET DE SUBSTITUTION\n")
            for letterAlphabet in string.ascii_uppercase:
                letterUser = input(f"\t{letterAlphabet}: ")
                while len(letterUser) != 1 or letterUser.upper() in key or letterUser.isnumeric(
                ) or letterUser.upper() == ' ' or ca.ispunct(letterUser):
                    print("\n")
                    show_error(
                        letterUser,
                        False,
                        True,
                        True) if letterUser.upper() in key else show_error(
                        letterUser,
                        False,
                        True)
                    letterUser = input(f"\t{letterAlphabet}: ")
                else:
                    key += unidecode(letterUser).upper()
        else:
            while len(key.replace(' ', '')) != 26 or key.isnumeric():
                key = input("Entrez votre clé de chiffrement: ")
        if choiceOfOption == "F1":
            nameFolder = write_file(
                substitution_encryption(
                    read_file(message),
                    unidecode(key).upper()))
        elif choiceOfOption == "F2":
            while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N':
                choiceOfFolder = input(
                    "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ")
            if choiceOfFolder.upper() == 'O':
                pathFolder = input(
                    f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/')
                if pathFolder[-1] == '/':
                    list_pathFolder = list(pathFolder)
                    list_pathFolder.pop(-1)
                    pathFolder = ''.join(list_pathFolder)
                nameFolder = write_file(
                    substitution_encryption(
                        read_file(message), unidecode(key).replace(
                            ' ', '').upper(), True), pathFolder)
            else:
                nameFolder = write_file(
                    substitution_encryption(
                        read_file(message), unidecode(key).replace(
                            ' ', '').upper(), True))
        print(f"\nVotre clé de chiffrement est:\n{' '.join(key)}\n\n\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(substitution_encryption(read_file(message), unidecode(key).upper()), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption ==
              'F2' else f"\nVotre clé de chiffrement est:\n{' '.join(key)}\nVotre message chiffré est:\n{substitution_encryption(message, unidecode(key).upper())}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{substitution_encryption(message, unidecode(key).replace(' ', '').upper(), True)}")
        do_you_want_to_continue()
    elif choiceOfCipherMethod == '3':
        choiceOfOption = display_options_message()
        choiceOfFolder = ''
        nameFolder = ''
        os.system("clear")
        print(f"{'-' * 63}\n{' ' * 21}CHIFFRE DE VIGENÈRE\n{'-' * 63}")
        while not message:
            message = display_message_entry_information()
        key = input("Entrez votre clé de chiffrement: ")
        if choiceOfOption == "F1":
            nameFolder = write_file(
                vigenere_encryption(
                    read_file(message), key))
        elif choiceOfOption == "F2":
            while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N':
                choiceOfFolder = input(
                    "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ")
            if choiceOfFolder == 'O':
                pathFolder = input(
                    f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/')
                if pathFolder[-1] == '/':
                    list_pathFolder = list(pathFolder)
                    list_pathFolder.pop(-1)
                    pathFolder = ''.join(list_pathFolder)
                nameFolder = write_file(
                    vigenere_encryption(
                        read_file(message),
                        key,
                        True),
                    pathFolder)
            else:
                nameFolder = write_file(
                    vigenere_encryption(
                        read_file(message), key, True))
        print(f"\n\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(vigenere_encryption(read_file(message), key), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption ==
              'F2' else f"\nVotre message chiffré est:\n{''.join(vigenere_encryption(message, key))}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{vigenere_encryption(message, key, True)}")
        do_you_want_to_continue()
    elif choiceOfCipherMethod.upper() == '4':
        choiceOfOption = display_options_message()
        choiceOfFolder = ''
        nameFolder = ''
        encodableCharacters = string.ascii_uppercase + \
            string.digits + '!\"$&\'()+,-./:;=?@_ '
        os.system("clear")
        print(f"{'-' * 63}\n{' ' * 20}CODE MORSE INTERNATIONAL\n{'-' * 63}")
        while not message:
            message = display_message_entry_information()
        if choiceOfOption == "F1":
            nameFolder = write_file(
                substitution_encryption(
                    read_file(message),
                    morseCharacters, False, encodableCharacters))
        elif choiceOfOption == "F2":
            while choiceOfFolder.upper() != 'O' and choiceOfFolder.upper() != 'N':
                choiceOfFolder = input(
                    "\nSouhaitez-vous enregistrer le fichier déchiffré dans un dossier\nprécis [O/N] ? ")
            if choiceOfFolder.upper() == 'O':
                pathFolder = input(
                    f"\nVeuillez indiquer le chemin vers ce dossier:\n{Path.home()}/").replace('//', '/')
                if pathFolder[-1] == '/':
                    list_pathFolder = list(pathFolder)
                    list_pathFolder.pop(-1)
                    pathFolder = ''.join(list_pathFolder)
                nameFolder = write_file(
                    substitution_encryption(
                        read_file(message),
                        morseCharacters,
                        True,
                        encodableCharacters),
                    pathFolder)
            else:
                nameFolder = write_file(
                    substitution_encryption(
                        read_file(message),
                        morseCharacters,
                        True,
                        encodableCharacters))
        print(f"\nCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}\n\nAperçu:\n{create_overview(substitution_encryption(read_file(message), morseCharacters, False, encodableCharacters), 63)}" if choiceOfOption == 'F1' else f"\n\nDÉCHIFFREMENT DU CONTENU DU FICHIER TERMINÉ\n{nameFolder}" if choiceOfOption ==
              'F2' else f"\nVotre message chiffré est:\n{substitution_encryption(message, morseCharacters, False, encodableCharacters)}" if choiceOfOption == 'M1' else f"\nLe message d'origine est:\n{substitution_encryption(message, morseCharacters, True, encodableCharacters)}")
        do_you_want_to_continue()
    elif choiceOfCipherMethod.upper() == 'C':
        print(f"{'-' * 63}\n{' ' * 26}CRÉDITS\n{'-' * 63}\nVERSION:{' ' * 51}v1.0\n\nAUTEUR:{' ' * 45}Yann LE COZ\nÉTABLISSEMENT:{' ' * 29}Bordeaux Ynov Campus")
        do_you_want_to_continue()
    elif choiceOfCipherMethod.upper() == 'Q':
        print("\n")
        leave_software()