def main():
    my_format_string='%(asctime)s %(levelname)s %(module)s.' \
                      '%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.INFO, format=my_format_string)
    chunkmap = chunkmap_factory(
        pickle.load(bz2.BZ2File('test_data/5th.chunkmap.bz2')))

    semrep_reader = SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'),
                                 DEFAULT_LINES_TO_IGNORE, chunkmap)
    tfidf = TF_IDF(file_mode="c")
    tfidf.build_tf_from_file(semrep_reader)
    semrep_reader.rewind()

    semrep_grapher = SemrepCooccurrenceGraphBuilder(
        node_weight_threshold=0.001,
        link_weight_threshold=0.003,
        tf_idf_provider=tfidf)
    eval_params = EvaluationParameters()
    eval_params.alpha = 0.65
    work = myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params,
                      PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE,
                      lambda x: 1.0 / math.exp(x) if x >= 0 and x < 5 else 0.0,
                      UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA,
                      open(OUTPUT_FILE, 'w'))
    work.run()
def main():
    my_format_string='%(asctime)s %(levelname)s %(module)s.' \
                      '%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.INFO,
                        format=my_format_string)
    chunkmap=chunkmap_factory(pickle.load(
                            bz2.BZ2File('test_data/5th.chunkmap.bz2')))
                            
    semrep_reader=SemrepOutput(bz2.BZ2File('test_data/5th.semrep.out.bz2'),
                                DEFAULT_LINES_TO_IGNORE,
                                chunkmap)
    tfidf=TF_IDF(file_mode="c")
    tfidf.build_tf_from_file(semrep_reader)
    semrep_reader.rewind()

    semrep_grapher=SemrepCooccurrenceGraphBuilder(node_weight_threshold=0.001,
                                                  link_weight_threshold=0.003,
                                                  tf_idf_provider=tfidf
                                                  )
    eval_params=EvaluationParameters()
    eval_params.alpha=0.65
    work=myWorkflow(semrep_reader, semrep_grapher, TextRanker(), eval_params,
                  PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE,
                  lambda x: 1.0/math.exp(x) if x>=0 and x<5 else 0.0,
                  UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA,
                  open(OUTPUT_FILE, 'w'))
    work.run()
Beispiel #3
0
 def setUp(self):
     import StringIO
     from MEDRank.file.chunkmap import chunkmap_factory
     # Fake file taken from actual SEMREP output.
     # No animals were harmed in the making of this test.
     # The output doesn't really make total sense, but to keep the volume
     # of data at a reasonable level we'll use a small, senseless file.
     self.fakefile=\
              StringIO.StringIO("SE|0000000000||ti|2|entity|Affecting|"
                                "ftcn|C0392760|involved||||1000|319|326\n"
                                "SE|0000000000||ti|2|entity|Involvement"
                                "with|ftcn|C1314939|involved||"
                                "||1000|319|326\n"
                                "SE|0000000000||ti|2|relation|||Steroid"
                                "hormone|horm,strd|horm|C0301818|"
                                "||||||||901|115|130||INTERACTS_WITH"
                                "||379|385|||steroid hormone"
                                "receptor|gngm,aapp,rcpt|gngm|C0597519"
                                "||None|steroid hormone receptors|their"
                                " respective steroid hormone"
                                " receptors|||||890|390|431\n"
                                "USELESS LINE!\n"
                                "SE|0000000000||ti|3|text|Coactivator and"
                                " corepressor proteins have recently been"
                                " identified that interact with steroid"
                                " hormone receptors and modulate"
                                " transcriptional activation\n")
     self.fake_chunkmap = chunkmap_factory({'123.txt': [0]})
Beispiel #4
0
 def setUp(self):
     import StringIO
     from MEDRank.file.chunkmap import chunkmap_factory
     # Data from an actual file, with a few additions
     self.fakefile=StringIO.StringIO("""
     >>>>> MMI
     * ERROR *
     0000000000|MM|530|Carboxyhemoglobin|C0007061| ["Carboxyhaemoglobin"-ti-1-"Carboxyhaemoglobin"]|TI
     0000000000|MM|223|Male population group|C0025266|["Men"-ti-1-"men"]|TI
     0000000000|MM|121|Levels|C0441889|["Levels"-ti-1-"levels"]|TI
     0000000000|MM|114|British|C0596227|["British"-ti-1-"British"]|TI
     0000000000|MM|114|Old|C0580836|["Old"-ti-1-"older"]|TI
     0000000000|MM|114|Old episode|C0677546|["Old"-ti-1-"older"]|TI
     <<<<< MMI
     >>>>> MMI
     0000000001|MM|585|Little's Disease|C0023882|["Little"-ti-1-"little"]|TI
     0000000001|MM|424|HAC protocol|C0062074|["HAD"-ti-1-"has"]|TI
     0000000001|MM|170|Background|C1706907|["Background"-ti-1-"BACKGROUND"]|TI
     0000000001|MM|170|General Population|C0683971|["General Population"-ti-1-"general population"]|TI
     0000000001|MM|170|Small|C0700321|["Little"-ti-1-"little"]|TI
     0000000001|MM|124|Levels|C0441889|["Levels"-ti-1-"levels"]|TI
     0000000001|MM|121|Exposure to|C0332157|["Exposure"-ti-1-"exposure"]|TI
     0000000001|MM|121|Injury due to exposure to external cause|C0274281|["Exposure, NOS"-ti-1-"exposure"]|TI
     0000000001|MM|121|Persons|C0027361|["People"-ti-1-"people"]|TI
     0000000001|MM|114|Old|C0580836|["Old"-ti-1-"older"]|TI
     0000000001|MM|114|Old episode|C0677546|["Old"-ti-1-"older"]|TI
     0000000001|MM|114|Known|C0205309|["Known"-ti-1-"known"]|TI
     <<<<< MMI
     """)
     self.lines_to_ignore=[">>>>> MMI", "<<<<< MMI", "* error *"]
     self.fake_chunkmap=chunkmap_factory({'123.txt': [0],
                                          '345.txt': [1]})
     self.mo=MetamapOutput(self.fakefile, self.lines_to_ignore,
                           self.fake_chunkmap)
Beispiel #5
0
 def setUp(self):
     import StringIO
     from MEDRank.file.chunkmap import chunkmap_factory
     # Fake file taken from actual SEMREP output.
     # No animals were harmed in the making of this test.
     # The output doesn't really make total sense, but to keep the volume
     # of data at a reasonable level we'll use a small, senseless file.
     self.fakefile=\
              StringIO.StringIO("SE|0000000000||ti|2|entity|Affecting|"
                                "ftcn|C0392760|involved||||1000|319|326\n"
                                "SE|0000000000||ti|2|entity|Involvement"
                                "with|ftcn|C1314939|involved||"
                                "||1000|319|326\n"
                                "SE|0000000000||ti|2|relation|||Steroid"
                                "hormone|horm,strd|horm|C0301818|"
                                "||||||||901|115|130||INTERACTS_WITH"
                                "||379|385|||steroid hormone"
                                "receptor|gngm,aapp,rcpt|gngm|C0597519"
                                "||None|steroid hormone receptors|their"
                                " respective steroid hormone"
                                " receptors|||||890|390|431\n"
                                "USELESS LINE!\n"
                                "SE|0000000000||ti|3|text|Coactivator and"
                                " corepressor proteins have recently been"
                                " identified that interact with steroid"
                                " hormone receptors and modulate"
                                " transcriptional activation\n")
     self.fake_chunkmap=chunkmap_factory({'123.txt': [0]})
Beispiel #6
0
 def testChunkedOutput(self):
     from MEDRank.file.chunkmap import chunkmap_factory
     from MEDRank.pubmed.pmid import Pmid
     _cm={'1.txt': [12345], '2.txt': [56789, 56790]}
     cm=chunkmap_factory(_cm)
     cno=ChunkedNLMOutput(self.fakefile, Line, self.lines_to_skip, cm)
     processed_sets=[x for x in cno]
     self.assertEquals(len(processed_sets), 2)
     self.assertEquals(len(processed_sets[0].lines), 1)
     self.assertEquals(len(processed_sets[1].lines), 2)
     self.assertEquals(processed_sets[0].set_id, Pmid(1))
     self.assertEquals(processed_sets[1].set_id, Pmid(2))
     self.assertEquals(processed_sets[1].lines[1].line_id, 56790)
Beispiel #7
0
def main():
    my_format_string='%(asctime)s %(levelname)s %(module)s.' \
                      '%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=my_format_string)
    chunkmap = chunkmap_factory(
        pickle.load(bz2.BZ2File('test_data/all_abstracts.mti_chunkmap.bz2')))

    reader = MtiOutput(
        bz2.BZ2File('test_data/all_abstracts.mti.just_metamap.out.bz2'),
        DEFAULT_LINES_TO_IGNORE, chunkmap)
    eval_params = EvaluationParameters()
    eval_params.alpha = 0.65
    work = MtiWorkflow(
        reader, None, None, eval_params, PAGERANK_CUTOFF, MESH_TREE_FILE,
        SAVCC_MATRIX_FILE, lambda x: 1.0 / math.exp(x)
        if x >= 0 and x < 5 else 0.0, UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA,
        open(OUTPUT_FILE, 'w'))
    work.run()
Beispiel #8
0
def main():
    my_format_string='%(asctime)s %(levelname)s %(module)s.' \
                      '%(funcName)s: %(message)s'
    logging.basicConfig(level=logging.DEBUG,
                        format=my_format_string)
    chunkmap=chunkmap_factory(pickle.load(
                    bz2.BZ2File('test_data/all_abstracts.mti_chunkmap.bz2')))
                            
    reader=MtiOutput(bz2.BZ2File('test_data/all_abstracts.mti.just_metamap.out.bz2'),
                                DEFAULT_LINES_TO_IGNORE,
                                chunkmap)
    eval_params=EvaluationParameters()
    eval_params.alpha=0.65
    work=MtiWorkflow(reader, None, None, eval_params,
                  PAGERANK_CUTOFF, MESH_TREE_FILE, SAVCC_MATRIX_FILE,
                  lambda x: 1.0/math.exp(x) if x>=0 and x<5 else 0.0,
                  UMLS_CONVERTER_DATA, UMLS_CONCEPT_DATA,
                  open(OUTPUT_FILE, 'w'))
    work.run()
Beispiel #9
0
 def setUp(self):
     # Test setup borrowed from semrep.py
     from MEDRank.file.semrep import (SemrepOutput)
     from MEDRank.file.metamap import (MetamapOutput)
     from MEDRank.file.chunkmap import chunkmap_factory
     import StringIO
     # logging.basicConfig(level=logging.DEBUG,
     #                  format='%(asctime)s %(levelname)s %(message)s')
     # This fakefile is NOT the same as semrep.py - this one has a 
     # relationship that should not be part of the graph, and one that 
     # should
     sr_file=StringIO.StringIO("SE|0000000000||ti|2|entity|Affecting|"
                               "ftcn|C0392760|involved||||1000|319|326\n"
                               "SE|0000000000||ti|2|entity|Involvement"
                               "with|ftcn|C1314939|involved||"
                               "||1000|319|326\n"
                               "SE|0000000000||ti|2|relation|||Steroid"
                               "hormone|horm,strd|horm|C0301818|"
                               "||||||||901|115|130||INTERACTS_WITH"
                               "||379|385|||steroid hormone"
                               "receptor|gngm,aapp,rcpt|gngm|C0597519"
                               "||None|steroid hormone receptors|their"
                               " respective steroid hormone"
                               " receptors|||||890|390|431\n"
                               "SE|0000000000||ti|2|relation|||Affection"
                               "|horm,strd|horm|C0392760|"
                               "||||||||901|115|130||INTERACTS_WITH"
                               "||379|385|||Involvement"
                               "with|gngm,aapp,rcpt|gngm|C1314939"
                               "||None|steroid hormone receptors|their"
                               " respective steroid hormone"
                               " receptors|||||890|390|431\n"
                               "USELESS LINE!\n"
                               "SE|0000000000||ti|3|text|Coactivator and"
                               " corepressor proteins have recently been"
                               " identified that interact with steroid"
                               " hormone receptors and modulate"
                               " transcriptional activation\n")
     fake_chunkmap=chunkmap_factory({'123.txt': [0]})
     self.sro=SemrepOutput(sr_file, ["USELESS LINE!"], fake_chunkmap)
Beispiel #10
0
 def setUp(self):
     # Test setup borrowed from semrep.py
     from MEDRank.file.semrep import (SemrepOutput)
     from MEDRank.file.metamap import (MetamapOutput)
     from MEDRank.file.chunkmap import chunkmap_factory
     import StringIO
     # logging.basicConfig(level=logging.DEBUG,
     #                  format='%(asctime)s %(levelname)s %(message)s')
     # This fakefile is NOT the same as semrep.py - this one has a
     # relationship that should not be part of the graph, and one that
     # should
     sr_file = StringIO.StringIO("SE|0000000000||ti|2|entity|Affecting|"
                                 "ftcn|C0392760|involved||||1000|319|326\n"
                                 "SE|0000000000||ti|2|entity|Involvement"
                                 "with|ftcn|C1314939|involved||"
                                 "||1000|319|326\n"
                                 "SE|0000000000||ti|2|relation|||Steroid"
                                 "hormone|horm,strd|horm|C0301818|"
                                 "||||||||901|115|130||INTERACTS_WITH"
                                 "||379|385|||steroid hormone"
                                 "receptor|gngm,aapp,rcpt|gngm|C0597519"
                                 "||None|steroid hormone receptors|their"
                                 " respective steroid hormone"
                                 " receptors|||||890|390|431\n"
                                 "SE|0000000000||ti|2|relation|||Affection"
                                 "|horm,strd|horm|C0392760|"
                                 "||||||||901|115|130||INTERACTS_WITH"
                                 "||379|385|||Involvement"
                                 "with|gngm,aapp,rcpt|gngm|C1314939"
                                 "||None|steroid hormone receptors|their"
                                 " respective steroid hormone"
                                 " receptors|||||890|390|431\n"
                                 "USELESS LINE!\n"
                                 "SE|0000000000||ti|3|text|Coactivator and"
                                 " corepressor proteins have recently been"
                                 " identified that interact with steroid"
                                 " hormone receptors and modulate"
                                 " transcriptional activation\n")
     fake_chunkmap = chunkmap_factory({'123.txt': [0]})
     self.sro = SemrepOutput(sr_file, ["USELESS LINE!"], fake_chunkmap)
Beispiel #11
0
 def setUp(self):
     import StringIO
     from MEDRank.file.chunkmap import chunkmap_factory
     #logging.basicConfig(level=logging.INFO,
     #                  format='%(asctime)s %(levelname)s %(message)s')
     
     # Data from an actual file, with a few additions
     self.fakefile=StringIO.StringIO("""
     16606471|*Pancreatitis, Acute Necrotizing|C0267941|46152|MH|RtM via: Pancreatitis, Acute Necrotizing|TI;AB|MM;RC
     16606471|*Randomized Controlled Trials as Topic|C0282440|19056|MH|RtM via: Randomized Controlled Trials as Topic|TI;AB|MM;RC
     16606471|Pancreatitis|C0030305|6224|MH|RtM via: Pancreatitis|AB|MM;RC
     16606471|Multicenter Studies as Topic|C0282439|4703|MH|||RC
     16606471|Sample Size|C0242618|3713|MH|RtM via: Sample Size|AB|MM;RC
     16606471|Double-Blind Method|C0013072|3524|MH|||RC
     16606471|*Clinical Trials as Topic|C0008976|2835|MH|RtM via: Clinical Trials|TI|MM
     16606471|Debridement|C0011079|1457|MH|RtM via: Debridement|AB|MM;RC
     16606471|Pancreas|C0030274|1387|MH|||RC
     16606471|Drainage|C0013103|1228|MH|RtM via: Drainage procedure|AB|MM;RC
     ------
     16606471|Patient Selection|C0242802|918|MH|||RC
     16606471|Laparotomy|C0023038|867|MH|RtM via: Laparotomy|AB|MM;RC
     """)
     self.fake_chunkmap=chunkmap_factory({16606471: 16606471})
Beispiel #12
0
    def setUp(self):
        import StringIO
        from MEDRank.file.chunkmap import chunkmap_factory
        #logging.basicConfig(level=logging.INFO,
        #                  format='%(asctime)s %(levelname)s %(message)s')

        # Data from an actual file, with a few additions
        self.fakefile = StringIO.StringIO("""
        16606471|*Pancreatitis, Acute Necrotizing|C0267941|46152|MH|RtM via: Pancreatitis, Acute Necrotizing|TI;AB|MM;RC
        16606471|*Randomized Controlled Trials as Topic|C0282440|19056|MH|RtM via: Randomized Controlled Trials as Topic|TI;AB|MM;RC
        16606471|Pancreatitis|C0030305|6224|MH|RtM via: Pancreatitis|AB|MM;RC
        16606471|Multicenter Studies as Topic|C0282439|4703|MH|||RC
        16606471|Sample Size|C0242618|3713|MH|RtM via: Sample Size|AB|MM;RC
        16606471|Double-Blind Method|C0013072|3524|MH|||RC
        16606471|*Clinical Trials as Topic|C0008976|2835|MH|RtM via: Clinical Trials|TI|MM
        16606471|Debridement|C0011079|1457|MH|RtM via: Debridement|AB|MM;RC
        16606471|Pancreas|C0030274|1387|MH|||RC
        16606471|Drainage|C0013103|1228|MH|RtM via: Drainage procedure|AB|MM;RC
        ------
        16606471|Patient Selection|C0242802|918|MH|||RC
        16606471|Laparotomy|C0023038|867|MH|RtM via: Laparotomy|AB|MM;RC
        """)
        self.fake_chunkmap = chunkmap_factory({16606471: 16606471})