def main(): my_format_string='%(asctime)s %(levelname)s %(module)s.' \ '%(funcName)s: %(message)s' logging.basicConfig(level=logging.INFO, format=my_format_string) chunkmap = chunkmap_factory( pickle.load(bz2.BZ2File('test_data/5th.chunkmap.bz2'))) semrep_reader = SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) tfidf = TF_IDF(file_mode="c") tfidf.build_tf_from_file(semrep_reader) semrep_reader.rewind() semrep_grapher = SemrepCooccurrenceGraphBuilder( node_weight_threshold=0.001, link_weight_threshold=0.003, tf_idf_provider=tfidf) eval_params = EvaluationParameters() eval_params.alpha = 0.65 work = myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE, lambda x: 1.0 / math.exp(x) if x >= 0 and x < 5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA, open(OUTPUT_FILE, 'w')) work.run()
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') chunkmap=chunkmap_factory(pickle.load( bz2.BZ2File('test_data/5th.chunkmap.bz2'))) setupstart=time.clock() semrep_reader=SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'), ["semrep wrapper error"], chunkmap) semrep_grapher=SemrepGraphBuilder() pr_algorithm=PageRanker() count=0 parsestart=time.clock() for article in semrep_reader: print "Read article", article.set_id, graph=semrep_grapher.create_graph(article.lines) print "graphed it", matrixed=graph.as_mapped_link_matrix() print "matrixed it,", fake_e_vector=[0.0] * len(matrixed) if fake_e_vector==[]: print "didn't pagerank because it was empty." else: ranked=pr_algorithm.evaluate(matrixed, fake_e_vector) print "pageranked it. Stats:", pr_algorithm.stats count+=1 endparse=time.clock() print "Total time elapsed: %1.3f seconds (%1.7f seconds were setup) for "\ "%d total articles, for a grand total of %1.3f compressed "\ "articles/second turned into semantic graphs, link matrices," \ " and finally pageranked." % ( endparse-setupstart, parsestart-setupstart, count, float(count)/(endparse-setupstart))
def main(): my_format_string='%(asctime)s %(levelname)s %(module)s.' \ '%(funcName)s: %(message)s' logging.basicConfig(level=logging.INFO, format=my_format_string) chunkmap=chunkmap_factory(pickle.load( bz2.BZ2File('test_data/5th.chunkmap.bz2'))) semrep_reader=SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) tfidf=TF_IDF(file_mode="c") tfidf.build_tf_from_file(semrep_reader) semrep_reader.rewind() semrep_grapher=SemrepCooccurrenceGraphBuilder(node_weight_threshold=0.001, link_weight_threshold=0.003, tf_idf_provider=tfidf ) eval_params=EvaluationParameters() eval_params.alpha=0.65 work=myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE, lambda x: 1.0/math.exp(x) if x>=0 and x<5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA, open(OUTPUT_FILE, 'w')) work.run()
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') chunkmap = chunkmap_factory( pickle.load(bz2.BZ2File('test_data/5th.chunkmap.bz2'))) setupstart = time.clock() semrep_reader = SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'), ["semrep wrapper error"], chunkmap) semrep_grapher = SemrepGraphBuilder() pr_algorithm = PageRanker() count = 0 parsestart = time.clock() for article in semrep_reader: print "Read article", article.set_id, graph = semrep_grapher.create_graph(article.lines) print "graphed it", matrixed = graph.as_mapped_link_matrix() print "matrixed it,", fake_e_vector = [0.0] * len(matrixed) if fake_e_vector == []: print "didn't pagerank because it was empty." else: ranked = pr_algorithm.evaluate(matrixed, fake_e_vector) print "pageranked it. Stats:", pr_algorithm.stats count += 1 endparse = time.clock() print "Total time elapsed: %1.3f seconds (%1.7f seconds were setup) for "\ "%d total articles, for a grand total of %1.3f compressed "\ "articles/second turned into semantic graphs, link matrices," \ " and finally pageranked." % ( endparse-setupstart, parsestart-setupstart, count, float(count)/(endparse-setupstart))
def main(): my_format_string='%(asctime)s %(levelname)s %(module)s.' \ '%(funcName)s: %(message)s' logging.basicConfig(level=logging.DEBUG, format=my_format_string) chunkmap = chunkmap_factory( pickle.load(bz2.BZ2File('test_data/all_abstracts.mti_chunkmap.bz2'))) reader = MtiOutput( bz2.BZ2File('test_data/all_abstracts.mti.just_metamap.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) eval_params = EvaluationParameters() eval_params.alpha = 0.65 work = MtiWorkflow( reader, None, None, eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE, lambda x: 1.0 / math.exp(x) if x >= 0 and x < 5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA, open(OUTPUT_FILE, 'w')) work.run()
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') chunkmap=chunkmap_factory(pickle.load( bz2.BZ2File( 'test_data/metamap.chunkmap.bz2'))) setupstart=time.clock() metamap_reader=MetamapOutput(bz2.BZ2File( 'test_data/metamap.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) grapher=MetamapCoccurrenceGraphBuilder() # PageRank is not the correct algorithm for a matrix of adirectional nodes # but it'll do for now, to exercise the system # pr_algorithm=PageRanker() # TextRanker now written pr_algorithm=MappedRanker(TextRanker()) count=0 parsestart=time.clock() for article in metamap_reader: print "Read article", article.set_id, graph=grapher.create_graph(article.lines) print "graphed it", matrix=graph.as_mapped_link_matrix() print "turned it into a", matrix, #fake_e_vector=[0.0] * len(matrix) if len(matrix)==0: print "didn't pagerank because it was empty." else: ranked=pr_algorithm.evaluate(matrix) print "TextRanked it. First results: %r Stats:" % \ [x for x in ranked][:5], pr_algorithm.stats count+=1 endparse=time.clock() print "Total time elapsed: %1.3f seconds (%1.7f seconds were setup) "\ "for %d total articles, for a grand total of %1.3f compressed "\ "articles/second read, turned into link matrices, and " \ " pageranked." \ % (endparse-setupstart, parsestart-setupstart, count, float(count)/(endparse-setupstart)) count+=1
def main(): my_format_string='%(asctime)s %(levelname)s %(module)s.' \ '%(funcName)s: %(message)s' logging.basicConfig(level=logging.DEBUG, format=my_format_string) chunkmap=chunkmap_factory(pickle.load( bz2.BZ2File('test_data/all_abstracts.mti_chunkmap.bz2'))) reader=MtiOutput(bz2.BZ2File('test_data/all_abstracts.mti.just_metamap.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) eval_params=EvaluationParameters() eval_params.alpha=0.65 work=MtiWorkflow(reader, None, None, eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE, lambda x: 1.0/math.exp(x) if x>=0 and x<5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA, open(OUTPUT_FILE, 'w')) work.run()
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') chunkmap = chunkmap_factory( pickle.load(bz2.BZ2File('test_data/metamap.chunkmap.bz2'))) setupstart = time.clock() metamap_reader = MetamapOutput(bz2.BZ2File('test_data/metamap.out.bz2'), DEFAULT_LINES_TO_IGNORE, chunkmap) grapher = MetamapCoccurrenceGraphBuilder() # PageRank is not the correct algorithm for a matrix of adirectional nodes # but it'll do for now, to exercise the system # pr_algorithm=PageRanker() # TextRanker now written pr_algorithm = MappedRanker(TextRanker()) count = 0 parsestart = time.clock() for article in metamap_reader: print "Read article", article.set_id, graph = grapher.create_graph(article.lines) print "graphed it", matrix = graph.as_mapped_link_matrix() print "turned it into a", matrix, #fake_e_vector=[0.0] * len(matrix) if len(matrix) == 0: print "didn't pagerank because it was empty." else: ranked = pr_algorithm.evaluate(matrix) print "TextRanked it. First results: %r Stats:" % \ [x for x in ranked][:5], pr_algorithm.stats count += 1 endparse = time.clock() print "Total time elapsed: %1.3f seconds (%1.7f seconds were setup) "\ "for %d total articles, for a grand total of %1.3f compressed "\ "articles/second read, turned into link matrices, and " \ " pageranked." \ % (endparse-setupstart, parsestart-setupstart, count, float(count)/(endparse-setupstart)) count += 1