Beispiel #1
0
def generateOffSet(inputStr, corpusURL):
    inputStr = corpus.clearHTML(inputStr)
    #print(inputStr)
    offset = []
    NewList = []
    dict_of_characters = {}
    ascii_chars = string.ascii_lowercase + '.,?!()'
    x = 0
    freq = freq_analysis(corpus.callMe(corpusURL))
    #print(freq)
    for i in ascii_chars:
        dict_of_characters[x] = i
        x += 1
    #for ind,item in enumerate(listOfChars):
    #print inputStr
    for ind,item in enumerate(inputStr):
        #print item
        NewList.append(item)
        x = random.sample(xrange(140), random.randrange(2, 70, 1) )
        #print x
        offset.append(len(x))
        for i in x:
            #NewList.append(dict_of_characters[random.randint(0, 25)])
            #NewList.append(garbageFill.generateGarbage(
          c = garbageFill.generateGarbage(freq);
          #print c
          NewList.append(c)
    return NewList,offset
Beispiel #2
0
def generateOffSet(inputStr, corpusURL):
    inputStr = corpus.clearHTML(inputStr)
    #print(inputStr)
    offset = []
    NewList = []
    dict_of_characters = {}
    ascii_chars = string.ascii_lowercase + '.,?!()'
    x = 0
    freq = freq_analysis(corpus.callMe(corpusURL))
    #print(freq)
    for i in ascii_chars:
        dict_of_characters[x] = i
        x += 1
    #for ind,item in enumerate(listOfChars):
    #print inputStr
    for ind, item in enumerate(inputStr):
        #print item
        NewList.append(item)
        x = random.sample(xrange(140), random.randrange(2, 70, 1))
        #print x
        offset.append(len(x))
        for i in x:
            #NewList.append(dict_of_characters[random.randint(0, 25)])
            #NewList.append(garbageFill.generateGarbage(
            c = garbageFill.generateGarbage(freq)
            #print c
            NewList.append(c)
    return NewList, offset
Beispiel #3
0
def word_list(file_of_words):
    corpus_words =""
    fileObject = open((file_of_words), 'r', 0)
    for i in fileObject.read():
            corpus_words += i
    return corpus.clearHTML(corpus_words)
Beispiel #4
0
def word_list(file_of_words):
    corpus_words = ""
    fileObject = open((file_of_words), 'r', 0)
    for i in fileObject.read():
        corpus_words += i
    return corpus.clearHTML(corpus_words)