def main():
    my_format_string='%(asctime)s %(levelname)s %(module)s.' \
                      '%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.INFO, format=my_format_string)
    chunkmap = chunkmap_factory(
        pickle.load(bz2.BZ2File('test_data/5th.chunkmap.bz2')))

    semrep_reader = SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'),
                                 DEFAULT_LINES_TO_IGNORE, chunkmap)
    tfidf = TF_IDF(file_mode="c")
    tfidf.build_tf_from_file(semrep_reader)
    semrep_reader.rewind()

    semrep_grapher = SemrepCooccurrenceGraphBuilder(
        node_weight_threshold=0.001,
        link_weight_threshold=0.003,
        tf_idf_provider=tfidf)
    eval_params = EvaluationParameters()
    eval_params.alpha = 0.65
    work = myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params,
                      PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE,
                      lambda x: 1.0 / math.exp(x) if x >= 0 and x < 5 else 0.0,
                      UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA,
                      open(OUTPUT_FILE, 'w'))
    work.run()
def main():
    my_format_string='%(asctime)s %(levelname)s %(module)s.' \
                      '%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.INFO,
                        format=my_format_string)
    chunkmap=chunkmap_factory(pickle.load(
                            bz2.BZ2File('test_data/5th.chunkmap.bz2')))
                            
    semrep_reader=SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'),
                                DEFAULT_LINES_TO_IGNORE,
                                chunkmap)
    tfidf=TF_IDF(file_mode="c")
    tfidf.build_tf_from_file(semrep_reader)
    semrep_reader.rewind()

    semrep_grapher=SemrepCooccurrenceGraphBuilder(node_weight_threshold=0.001,
                                                  link_weight_threshold=0.003,
                                                  tf_idf_provider=tfidf
                                                  )
    eval_params=EvaluationParameters()
    eval_params.alpha=0.65
    work=myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params,
                  PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE,
                  lambda x: 1.0/math.exp(x) if x>=0 and x<5 else 0.0,
                  UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA,
                  open(OUTPUT_FILE, 'w'))
    work.run()
Beispiel #3
0
 def __init__(self, fileobject, lines_to_ignore, chunkmap, converter):
     SemrepOutput.__init__(self,
                           fileobject,
                           lines_to_ignore=lines_to_ignore,
                           chunkmap=chunkmap)
     self._my_converter = converter
     self.line_type = self.line_factory
 def setUp(self):
     # Test setup borrowed from semrep.py
     from MEDRank.file.semrep import (SemrepOutput)
     from MEDRank.file.metamap import (MetamapOutput)
     from MEDRank.file.chunkmap import chunkmap_factory
     import StringIO
     # logging.basicConfig(level=logging.DEBUG,
     #                  format='%(asctime)s %(levelname)s %(message)s')
     # This fakefile is NOT the same as semrep.py - this one has a
     # relationship that should not be part of the graph, and one that
     # should
     sr_file = StringIO.StringIO("SE|0000000000||ti|2|entity|Affecting|"
                                 "ftcn|C0392760|involved||||1000|319|326\n"
                                 "SE|0000000000||ti|2|entity|Involvement"
                                 "with|ftcn|C1314939|involved||"
                                 "||1000|319|326\n"
                                 "SE|0000000000||ti|2|relation|||Steroid"
                                 "hormone|horm,strd|horm|C0301818|"
                                 "||||||||901|115|130||INTERACTS_WITH"
                                 "||379|385|||steroid hormone"
                                 "receptor|gngm,aapp,rcpt|gngm|C0597519"
                                 "||None|steroid hormone receptors|their"
                                 " respective steroid hormone"
                                 " receptors|||||890|390|431\n"
                                 "SE|0000000000||ti|2|relation|||Affection"
                                 "|horm,strd|horm|C0392760|"
                                 "||||||||901|115|130||INTERACTS_WITH"
                                 "||379|385|||Involvement"
                                 "with|gngm,aapp,rcpt|gngm|C1314939"
                                 "||None|steroid hormone receptors|their"
                                 " respective steroid hormone"
                                 " receptors|||||890|390|431\n"
                                 "USELESS LINE!\n"
                                 "SE|0000000000||ti|3|text|Coactivator and"
                                 " corepressor proteins have recently been"
                                 " identified that interact with steroid"
                                 " hormone receptors and modulate"
                                 " transcriptional activation\n")
     fake_chunkmap = chunkmap_factory({'123.txt': [0]})
     self.sro = SemrepOutput(sr_file, ["USELESS LINE!"], fake_chunkmap)
Beispiel #5
0
 def __init__(self, fileobject, lines_to_ignore, chunkmap, converter):
     SemrepOutput.__init__(self, fileobject,
                           lines_to_ignore=lines_to_ignore,
                           chunkmap=chunkmap)
     self._my_converter=converter
     self.line_type=self.line_factory