Beispiel #1
0
Datei: tp5.py Projekt: joelcn/uni
def task1():
    # Task 1
    a = fileMK.stemText(t[0])
    m = re.findall("[ \(\)<>]([a-zA-Z]{4})[ \(\)<>]", a)
    if m:
        mDict = fileMK.unsortedDictFromWords(m)   
        p = outputPath + "task1"
        fileMK.writeTextToFile(str(sorted(mDict.keys())), p)
Beispiel #2
0
Datei: tp3.py Projekt: joelcn/uni
def task2():
    # Task 2
    w = []
    for i in range(len(t)):
        w += t[i].split(" ")
    unsorted_dict = fileMK.unsortedDictFromWords(w)
    s = "There are " + str(len(unsorted_dict)) + " different Terms"
    print(s)
    path = outputPath + "task2.txt"
    fileMK.writeTextToFile(s, path)
Beispiel #3
0
Datei: tp3.py Projekt: joelcn/uni
def task1():
    # Task 1
    words = []
    for i in range(len(t)):
        words += (t[i].split(" "))
    sorted_dict = fileMK.sortedDictFromWords(words)
    s = ""
    for i in range(50):
        s += str(sorted_dict[i]) + "\n"
    s = s.replace("(", "").replace(")", "").replace("'","").replace(",",":")
    path = input(" Enter Path & Filename (like \"D:\\terms.txt\")\n")
    if path == "":
        path = outputPath + "task1.txt"
    fileMK.writeTextToFile(s, path)
Beispiel #4
0
Datei: tp3.py Projekt: joelcn/uni
def task4():
    # Task 4
    words = ""
    for i in range(len(t)):
        words += t[i] + "\n"
    # Task a
    print(" More than 3 Non-Space Characters in Brackets.")
    m = re.findall("[(]\S{3,}[)]", words.lower())
    print(str(len(m)) + " Brackets found")
    path = outputPath + "task4a.txt"
    fileMK.writeTextToFile(str(m), path)
    # Task b
    print(" More than 1 consecutive Words in Uppercase.")
    m = re.findall("[A-Z]{2,}\s[A-Z]{2,}[\s[A-Z]{2,}]*", words.lower())
    print(str(len(m)) + " Words found")
    path = outputPath + "task4b.txt"
    fileMK.writeTextToFile(str(m), path)
    # Task c
    print(" Numbers starting with \"#\".")
    m = re.findall("#[0-9,]+", words.lower())
    print(str(len(m)) + " Numbers found")
    path = outputPath + "task4c.txt"
    fileMK.writeTextToFile(str(m), path)
    # Task d
    print(" Terms containing a Hyphen.")
    m = re.findall("[a-zA-Z]{3,}[-][a-zA-Z]{3,}", words.lower())
    print(str(len(m)) + " Terms found")
    path = outputPath + "task4d.txt"
    fileMK.writeTextToFile(str(m), path)
Beispiel #5
0
Datei: tp4.py Projekt: joelcn/uni
def task4():
    # Task 4
    s = ""
    fcan = fileMK.get2FollowingTerms(t, "can")
    for i in range(len(fcan)):
        s+= "can " + fcan[i] + "\n"
    path = outputPath + "task4-can.txt"
    fileMK.writeTextToFile(s, path)
    s = ""
    fgeneral = fileMK.get2FollowingTerms(t, "general")
    for i in range(len(fgeneral)):
        s += "general " + fgeneral[i] + "\n"
    path = outputPath + "task4-general.txt"
    fileMK.writeTextToFile(s, path)
Beispiel #6
0
Datei: tp4.py Projekt: joelcn/uni
def task5():
    # Task 5
    tcan = fileMK.readFileToArray(outputPath + "task4-can.txt")
    dictcan = fileMK.sortedDictFromWords(tcan)
    s = ""
    for i in range(10):
        s += str(dictcan[i]).replace("can ","") + "\n"
    path = outputPath + "task5-can.txt"
    fileMK.writeTextToFile(s, path)
    tgeneral = fileMK.readFileToArray(outputPath + "task4-general.txt")
    dictgeneral = fileMK.sortedDictFromWords(tgeneral)
    s = ""
    for i in range(10):
        s += str(dictgeneral[i]).replace("general ","") + "\n"
    path = outputPath + "task5-general.txt"
    fileMK.writeTextToFile(s, path)
Beispiel #7
0
Datei: tp3.py Projekt: joelcn/uni
def task3():
    # Task 3
    words = []
    for i in range(len(t)):
        words += t[i].lower().split(" ")
    unsorted_dict = fileMK.unsortedDictFromWords(words)
    try:
        i = int(input(" How many times should the term appear?\n"))
    except ValueError:
        print(" Invalid Value, assuming 51\n")
        i = 51
    times = [key for key, value in unsorted_dict.items() if value == i]
    s = ""
    for j in range(len(times)):
        s += str(times[j]) + "; "
    n = str(len(s.split("; "))-1)
    print(" " + n + " Words found occuring " + str(i) + " time(s)")
    path = outputPath + "task3-" + str(i) + ".txt"
    fileMK.writeTextToFile(s, path)
Beispiel #8
0
Datei: tp3.py Projekt: joelcn/uni
def task6():
    # Task 6
    hamilton = fileMK.readFile(corpusPath + "Federalist Hamilton.txt")
    madison = fileMK.readFile(corpusPath + "Federalist Madison.txt")
    sorted_hamilton = fileMK.sortedDictFromWords(hamilton.lower().split(" "))
    sorted_madison = fileMK.sortedDictFromWords(madison.lower().split(" "))
    sHamilton = ""
    sMadison = ""
    for i in range(15):
        sHamilton += str(sorted_hamilton[i]) + "\n"
        sMadison += str(sorted_madison[i]) + "\n"
    sHamilton = sHamilton.replace("(", "").replace(")", "").replace("'","")
    sMadison = sMadison.replace("(", "").replace(")", "").replace("'","")
    h = "Hamilton:\n" + str(sHamilton)
    m = "Madison:\n" + str(sMadison)
    print(h)
    print(m)
    path = outputPath + "task6.txt"
    fileMK.writeTextToFile(h + "\n" + m, path)