Ejemplo n.º 1
0
def simpleSentiment(trainingset, devin, devout):
    temp = parseFile(trainingset)
    tokens = temp[0]
    tags = temp[1]

    print("Training started.")
    emissionTable = calculateEmission(tokens, tags, 3)
    print("Training Completed.\n")

    inputTokens = parseFileInput(devin)
    inputTags = []
    pprint(inputTokens)

    # max(stats, key=stats.get)

    for i in inputTokens:
        if i == None:
            inputTags.append("#SPACE#")
        else:
            if i in emissionTable:
                inputTags.append(
                    max(emissionTable[i], key=emissionTable[i].get))
            else:
                inputTags.append(
                    max(emissionTable['#UNK#'],
                        key=emissionTable['#UNK#'].get))

    pprint(inputTags)
    writeout(
        inputTokens, inputTags,
        '/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/output'
    )
Ejemplo n.º 2
0
def test_consistency():
    charms = parse.parseFile('charms.txt')
    names = [charm['name'] for charm in charms]

    assert len(charms) == len(names)
    assert len(set(names)) == len(names)

    def pok(prereq):
        if prereq == 'None':
            return True
        if prereq == 'Any four Occult Charms':
            return True
        if prereq == 'Ox-Body Technique (x5)':
            return True
        else:
            return prereq in names

    for charm in charms:
        if ' or ' in charm['prerequisites']:
            prereqs = charm['prerequisites'].split(' or ')
        else:
            prereqs = charm['prerequisites'].split(',')
        prereqs = [p.strip() for p in prereqs]
        for p in prereqs:
            assert pok(p)
Ejemplo n.º 3
0
def makeModel(files):
    '''Map String String -> Tree [Event]'''
    tree = m.Tree([])
    for spec in files:
        parsedLines = parse.parseFile(files[spec])
        specTree = makeTree(parsedLines)
        tree.addChild(spec, specTree)
    return tree
Ejemplo n.º 4
0
def loadFile(db, filePath):

    if isfile(filePath):
        print("Loading: " + filePath)
        searchDetails, entries = parse.parseFile(filePath)
        db.putSearchResults(searchDetails, entries)
    else:
        pass
Ejemplo n.º 5
0
def rand():
	elements = parse.parseFile('ymailh')
	f = codecs.open('gold_sent', 'w', 'utf-8')


	for element in elements:
		if random.random() < 0.1 and len(element.review) > 1:
			f.write(element.review+' &&*%*&& \n')
Ejemplo n.º 6
0
def pos_tag(filename):
	elements = parse.parseFile(filename)

	tok_sents = [nltk.word_tokenize(element.review) for element in elements]
	for sent in tok_sents:
		for (x,y) in nltk.pos_tag(sent):
			print x,',',y

		print '****'
Ejemplo n.º 7
0
def parseFile (fname, selthis, seltype, othis, mtname, nbody):
  global selfthis
  global selftype
  global obthis
  global methodtablename
  global methodtable
  global fnbodies
  global fndeclarations
  global no_body
  selfthis = selthis
  selftype = seltype
  obthis = othis
  methodtablename = mtname
  methodtable = ''
  fnbodies = ''
  fndeclarations = ''
  no_body = nbody
  parse.parseFile(fname, createFunction)
  return fndeclarations, methodtable, fnbodies
Ejemplo n.º 8
0
def loadGoldenSet(db, taggedPath="../zoteroExport/taggedPapers.csv", notApplicablePath="../zoteroExport/notApplicable.csv"):

    # Ensure that each file exists
    if isfile(taggedPath):
        if isfile(notApplicablePath):
            tagSearchDetail, taggedEntries = parse.parseFile(taggedPath)
            naSearchDetail, naEntries = parse.parseFile(notApplicablePath)

            # Convert the CSV entries to have IEEE headers
            # Because they were exported from Zotero, some header names
            # are different
            taggedEntries = parse.zoteroToIEEE(taggedEntries)
            naEntries = parse.zoteroToIEEE(naEntries)

            db.putSearchResults(tagSearchDetail, taggedEntries)
            db.putSearchResults(naSearchDetail, naEntries)
        else:
            pass
            # Print that the na path isn't there
    else:
        pass
Ejemplo n.º 9
0
def main():
    # simpleSentiment('/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/train','/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/dev.in','/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/dev.out')

    temp = parseFile(
        '/Users/aditya/Desktop/Machine Learning/Project/MLProject/EN/EN/hw4')
    tokens = temp[0]
    tags = temp[1]

    # pprint(tokens)
    # print("\n")
    # pprint(tags)

    calculateTransmission(tokens)
    calculateEmission(tags, tokens, 3)
Ejemplo n.º 10
0

def printText(textstr):
    textbox.insert(END, textstr + "\n")


def callback(sv):
    textbox.delete(1.0, END)
    t = searchTree(types, sv.get())
    printTree2(t, printText)
    #print(t)
    #printTree2(types, printText)
    printText(sv.get())


if __name__ == "__main__":
    global textbox, types
    root = Tk()
    button = Button(root)
    button.grid(row=0, column=0)
    text = Text(root)
    #global sv
    sv = StringVar()
    textbox = text
    entry = Entry(root, textvariable=sv)
    sv.trace("w", lambda name, index, mode, sv=sv: callback(sv))
    entry.grid(row=1, column=0)
    text.grid(row=2, column=0, columnspan=2)
    types = parseFile("test.cpp", printText)

    mainloop()
Ejemplo n.º 11
0
	def test_sample_unix(self):
		val = parseFile("/home/preethibaskar/Documents/Data/raw_data/ShellMonitorData/ShellData_1449473904003189615.log")
		self.assertEqual(200,val)
Ejemplo n.º 12
0
# naive Baise classifier for ratings

import parse, random, math
from nltk import bigrams

# parse raw file into object list
entryList = parse.parseFile('ymailh')

def test():
	uniDictList = [{} for x in range(6)]
	biDictList = [{} for x in range(6)]
	vocabSize = [0 for x in range(6)]
	totalSize = [0 for x in range(6)]
	biVocabSize = [0 for x in range(6)]
	bitotalSize = [0 for x in range(6)]
	numList = [0 for x in range(6)]
	numCorrect = total = 0

	# randomly split set 
	for entry in entryList:
		if random.random() > 0.10:
			entry.test = 0
		else:
			entry.test = 1

	# compute train dictionaries
	for entry in entryList:
		if entry.test == 0:
			for word in entry.review.split():
				uniDictList[entry.rating][word] = uniDictList[entry.rating].get(word,0)+1 
Ejemplo n.º 13
0
	def test_sample_R(self):
		val = parseFile("/home/preethibaskar/Documents/Data/raw_data/RMonitorData/data_1446431444.json")
		self.assertEqual(200,val)
Ejemplo n.º 14
0
import parse
from collections import Counter

elements = parse.parseFile('ymailh')

dictionaryList = [{} for x in range(6)]

for element in elements:
	for word in element.review.split():
		dictionaryList[element.rating][word.lower()] = dictionaryList[element.rating].get(word.lower(),0)+1


for i in range(1,6):
	print Counter(dictionaryList[i]).most_common(40)
Ejemplo n.º 15
0
	# Check that the next argument is a file with a proper 'type'

	factors = formatCheck(sys.argv[i])

	if not factors or not factors["TYPE"] or factors["TYPE"] not in ["singlevote", "rankedvote", "approvalvote"]:
		print("Bad file! " + sys.argv[i] + " does not exist. Halting... \n")
		i = l
		break

	print(boldify("\nReading file: ") + factors["NAME"] + "\n")
	print(boldify("Note comments: ") + factors["COMMENTS"] + "\n")

	# Get candidates and ballots from file

	a = parseFile(sys.argv[i], factors["TYPE"])

	if a: 
		[population, candidates, typ] = a
		i += 1

	else:
		print("Error")
		break

	# Then, read flags and apply until next file appears

	if factors["TYPE"] == "singlevote":
		while i < l and (sys.argv[i])[0] == "-":

			if sys.argv[i] == "-maj":
 def on_created(self, event):
     parseFile(event.src_path);
Ejemplo n.º 17
0
def more_500():
	elements = parse.parseFile('ymailh')
	f = codecs.open('gold_sent_500','w', 'utf-8')
	for x in range(500):
		f.write(elements[x].review+' &&*%*&& \n')