def main(): baseDir = "/Users/francis/Documents/cpe480_texts/basicmaterials/chemicals" os.chdir(baseDir) articles = glob.glob('*.txt') graph = nx.Graph() print "Let's get to work!" for index in range(0, len(articles)): start = time.time() article = articles[index] print article, "- (",index + 1,"of", len(articles),")" significant = blib.filterWords(article) blib.handleDocumentNouns(significant["NN"], graph) print article, "done." print "Time Elapsed:", time.time() - start, "seconds" print print # sentiment = determineSemtiment(significant[0]) # Export graph data to a file edgeListFile = "chemicals.graph" print "Writing graph to file...." nx.write_gml(graph, edgeListFile) print "done"
def reinforce(graph, path, posReinforce): significant = blib.filterWords(path) for noun in significant["NN"]: try: node = graph.node[noun] if posReinforce: node["value"] += 1 else: node["value"] -= 1 except: continue
def main(): # List all the avaliable graphs and their names baseDir = "/Users/francis/Documents/cpe480_texts/" graphs = { "Basic Materials : Oil And Gas": baseDir + "basicmaterials/oilngasdrilling/oilngasdrilling.graph", "Basic Materials : Chemicals": baseDir + "basicmaterials/chemicals/chemicals.graph", } matchRate = {} currentArticleLoc = "/Users/francis/Documents/cpe480_texts/unmatched/" + sys.argv[1] articleWords = blib.filterWords(currentArticleLoc) # For each graph in graphs begin = time.time() print "Begin Matching..." for name, graphLoc in graphs.iteritems(): print "Matching", name, "graph..." # load the graph graph = nx.read_gml(graphLoc) # find the match rate rate = findMatchRate(graph, articleWords["NN"]) # add match rate to the match rate dictionary matchRate[name] = rate # Find highest and print Match Rate highest = ("", 0) for name, rate in matchRate.iteritems(): if rate > highest[1]: highest = (name, rate) print name, ":", rate print sys.argv[1], "..." print "Matched to", highest[0], ": Match Rate", highest[1] print "Time Elapsed:", (time.time() - begin) / 60, "mins"