#!/usr/local/bin/python import cgi, cgitb, json, os import opinionsToGraph as OTG cgitb.enable() print "Content-Type: text/html" print print """ <form> <center><input type="text" name="searchTerm"><input type="submit" value="Go"></center> </form> """ form = cgi.FieldStorage() try: searchTerm = form["searchTerm"].value nodes = OTG.getNodes('/Users/andrewgambardella/Sites/data/' + searchTerm + '.txt') (allNodes, links) = OTG.getAllNodesAndLinks(nodes) os.system('rm /Users/andrewgambardella/Sites/opinions.json') with open('/Users/andrewgambardella/Sites/opinions.json', 'w') as out: out.write(json.dumps({"nodes": allNodes, "links": links})) print """ <iframe id="graph" width="70%" height="90%" src="http://localhost/~andrewgambardella/graph.html"></iframe> <iframe id="info" width="29%" height="90%" src="http://localhost/~andrewgambardella/info.html"></iframe> """ except: pass
p[0] = ('List of premises', p[1]) if len(p) == 3: p[0] = ('List of premises', p[1], p[2]) def p_dHelper(p): '''dHelper : VC S dHelper | VC S | PERIOD dHelper | PERIOD''' if len(p) == 2: p[0] = ('Conclusive verb', p[1]) if len(p) == 3: p[0] = ('Conclusive verb', p[1], p[2]) if len(p) == 4: p[0] = ('Conclusive verb', p[1], p[2], p[3]) #Error rule for syntax errors def p_error(p): print "Syntax error in input!" if __name__ == "__main__": import os import ply.yacc as yacc import opinionsToGraph as otg yacc.yacc() nodes = [] for aFile in os.listdir('../data/'): nodes += otg.getNodes('../data/' + aFile) while 1: yacc.parse(nodes[0].get('text'))
#!/usr/local/bin/python import cgi, cgitb, json, os import opinionsToGraph as OTG cgitb.enable() print "Content-Type: text/html" print print """ <form> <center><input type="text" name="searchTerm"><input type="submit" value="Go"></center> </form> """ form = cgi.FieldStorage() try: searchTerm = form["searchTerm"].value nodes = OTG.getNodes('/Users/andrewgambardella/Sites/data/' + searchTerm + '.txt') (allNodes, links) = OTG.getAllNodesAndLinks(nodes) os.system('rm /Users/andrewgambardella/Sites/opinions.json') with open('/Users/andrewgambardella/Sites/opinions.json', 'w') as out: out.write(json.dumps({"nodes":allNodes,"links":links})) print """ <iframe id="graph" width="70%" height="90%" src="http://localhost/~andrewgambardella/graph.html"></iframe> <iframe id="info" width="29%" height="90%" src="http://localhost/~andrewgambardella/info.html"></iframe> """ except: pass
ret = [] with open(filename) as f: lines = f.readlines() for line in lines: line = line.strip() ret.append(line) return ret if __name__ == "__main__": import argparse, json, os parser = argparse.ArgumentParser(description='Get links from a file.') parser.add_argument('f', metavar='filename', action='store', help='Directory of files to parse (LexisNexis format)') args = parser.parse_args() nodes = [] for aFile,color in zip(os.listdir(args.f),range(0,len(os.listdir(args.f)))): nodes += otg.getNodes(args.f + aFile,color) all_text = [] for node in nodes: all_text.append(node.get('text') + ' ') all_text = ''.join(all_text) trainer = punkt.PunktSentenceTokenizer() trainer.train("data/real_estate.txt") #Sentence fragmenter trained on real_estate (arbitrarily) sentTokens = trainer.tokenize(all_text) wordTokens = [nltk.wordpunct_tokenize(s) for s in sentTokens] flatWords = [item.lower() for sublist in wordTokens for item in sublist if item.isalpha()] #puts all words into one list finder = BigramCollocationFinder.from_words(flatWords) finder.apply_word_filter(lambda w: w in getBlacklist('data/english.stop.txt')) bigram_measures = nltk.collocations.BigramAssocMeasures() scored = finder.score_ngrams(bigram_measures.likelihood_ratio) print scored