def simpleSentiment(trainingset, devin, devout): temp = parseFile(trainingset) tokens = temp[0] tags = temp[1] print("Training started.") emissionTable = calculateEmission(tokens, tags, 3) print("Training Completed.\n") inputTokens = parseFileInput(devin) inputTags = [] pprint(inputTokens) # max(stats, key=stats.get) for i in inputTokens: if i == None: inputTags.append("#SPACE#") else: if i in emissionTable: inputTags.append( max(emissionTable[i], key=emissionTable[i].get)) else: inputTags.append( max(emissionTable['#UNK#'], key=emissionTable['#UNK#'].get)) pprint(inputTags) writeout( inputTokens, inputTags, '/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/output' )
def test_consistency(): charms = parse.parseFile('charms.txt') names = [charm['name'] for charm in charms] assert len(charms) == len(names) assert len(set(names)) == len(names) def pok(prereq): if prereq == 'None': return True if prereq == 'Any four Occult Charms': return True if prereq == 'Ox-Body Technique (x5)': return True else: return prereq in names for charm in charms: if ' or ' in charm['prerequisites']: prereqs = charm['prerequisites'].split(' or ') else: prereqs = charm['prerequisites'].split(',') prereqs = [p.strip() for p in prereqs] for p in prereqs: assert pok(p)
def makeModel(files): '''Map String String -> Tree [Event]''' tree = m.Tree([]) for spec in files: parsedLines = parse.parseFile(files[spec]) specTree = makeTree(parsedLines) tree.addChild(spec, specTree) return tree
def loadFile(db, filePath): if isfile(filePath): print("Loading: " + filePath) searchDetails, entries = parse.parseFile(filePath) db.putSearchResults(searchDetails, entries) else: pass
def rand(): elements = parse.parseFile('ymailh') f = codecs.open('gold_sent', 'w', 'utf-8') for element in elements: if random.random() < 0.1 and len(element.review) > 1: f.write(element.review+' &&*%*&& \n')
def pos_tag(filename): elements = parse.parseFile(filename) tok_sents = [nltk.word_tokenize(element.review) for element in elements] for sent in tok_sents: for (x,y) in nltk.pos_tag(sent): print x,',',y print '****'
def parseFile (fname, selthis, seltype, othis, mtname, nbody): global selfthis global selftype global obthis global methodtablename global methodtable global fnbodies global fndeclarations global no_body selfthis = selthis selftype = seltype obthis = othis methodtablename = mtname methodtable = '' fnbodies = '' fndeclarations = '' no_body = nbody parse.parseFile(fname, createFunction) return fndeclarations, methodtable, fnbodies
def loadGoldenSet(db, taggedPath="../zoteroExport/taggedPapers.csv", notApplicablePath="../zoteroExport/notApplicable.csv"): # Ensure that each file exists if isfile(taggedPath): if isfile(notApplicablePath): tagSearchDetail, taggedEntries = parse.parseFile(taggedPath) naSearchDetail, naEntries = parse.parseFile(notApplicablePath) # Convert the CSV entries to have IEEE headers # Because they were exported from Zotero, some header names # are different taggedEntries = parse.zoteroToIEEE(taggedEntries) naEntries = parse.zoteroToIEEE(naEntries) db.putSearchResults(tagSearchDetail, taggedEntries) db.putSearchResults(naSearchDetail, naEntries) else: pass # Print that the na path isn't there else: pass
def main(): # simpleSentiment('/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/train','/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/dev.in','/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/dev.out') temp = parseFile( '/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/hw4') tokens = temp[0] tags = temp[1] # pprint(tokens) # print("\n") # pprint(tags) calculateTransmission(tokens) calculateEmission(tags, tokens, 3)
def printText(textstr): textbox.insert(END, textstr + "\n") def callback(sv): textbox.delete(1.0, END) t = searchTree(types, sv.get()) printTree2(t, printText) #print(t) #printTree2(types, printText) printText(sv.get()) if __name__ == "__main__": global textbox, types root = Tk() button = Button(root) button.grid(row=0, column=0) text = Text(root) #global sv sv = StringVar() textbox = text entry = Entry(root, textvariable=sv) sv.trace("w", lambda name, index, mode, sv=sv: callback(sv)) entry.grid(row=1, column=0) text.grid(row=2, column=0, columnspan=2) types = parseFile("test.cpp", printText) mainloop()
def test_sample_unix(self): val = parseFile("/home/preethibaskar/Documents/Data/raw_data/ShellMonitorData/ShellData_1449473904003189615.log") self.assertEqual(200,val)
# naive Baise classifier for ratings import parse, random, math from nltk import bigrams # parse raw file into object list entryList = parse.parseFile('ymailh') def test(): uniDictList = [{} for x in range(6)] biDictList = [{} for x in range(6)] vocabSize = [0 for x in range(6)] totalSize = [0 for x in range(6)] biVocabSize = [0 for x in range(6)] bitotalSize = [0 for x in range(6)] numList = [0 for x in range(6)] numCorrect = total = 0 # randomly split set for entry in entryList: if random.random() > 0.10: entry.test = 0 else: entry.test = 1 # compute train dictionaries for entry in entryList: if entry.test == 0: for word in entry.review.split(): uniDictList[entry.rating][word] = uniDictList[entry.rating].get(word,0)+1
def test_sample_R(self): val = parseFile("/home/preethibaskar/Documents/Data/raw_data/RMonitorData/data_1446431444.json") self.assertEqual(200,val)
import parse from collections import Counter elements = parse.parseFile('ymailh') dictionaryList = [{} for x in range(6)] for element in elements: for word in element.review.split(): dictionaryList[element.rating][word.lower()] = dictionaryList[element.rating].get(word.lower(),0)+1 for i in range(1,6): print Counter(dictionaryList[i]).most_common(40)
# Check that the next argument is a file with a proper 'type' factors = formatCheck(sys.argv[i]) if not factors or not factors["TYPE"] or factors["TYPE"] not in ["singlevote", "rankedvote", "approvalvote"]: print("Bad file! " + sys.argv[i] + " does not exist. Halting... \n") i = l break print(boldify("\nReading file: ") + factors["NAME"] + "\n") print(boldify("Note comments: ") + factors["COMMENTS"] + "\n") # Get candidates and ballots from file a = parseFile(sys.argv[i], factors["TYPE"]) if a: [population, candidates, typ] = a i += 1 else: print("Error") break # Then, read flags and apply until next file appears if factors["TYPE"] == "singlevote": while i < l and (sys.argv[i])[0] == "-": if sys.argv[i] == "-maj":
def on_created(self, event): parseFile(event.src_path);
def more_500(): elements = parse.parseFile('ymailh') f = codecs.open('gold_sent_500','w', 'utf-8') for x in range(500): f.write(elements[x].review+' &&*%*&& \n')