Exemple #1
0
def test_journal_plots_longjournal():
    '''
    Tests journal plot
    '''
    article_text1 = """
    Sak binds to p53 , and studies are underway to provide a molecular context for the Sak-p53 interaction.
    By coimmunoprecipitation coupled with mass spectrometry, we demonstrate that AHNAK interacts with dysferlin.
    """
    journal1 = "THIS IS A VERY VERY LONG JOURNAL THAT WILL HAUNT PPAXE AND MATPLOTLIB AGAIN AND AGAIN"
    article_text2 = """
    Here we show that KLF4 physically interacts with STAT3 upon cytokine-induced phosphorylation of tyrosine 705 ( Y705 ) on STAT3.
    In this study , we report the Grb7 protein interacts with Filamin-a , an actin-crosslinking component of the cell cytoskeleton.
    """
    journal2 = "BMC GENOMICS"

    articles = [
        core.Article(pmid="1234", fulltext=article_text1, journal=journal1, year = "2009"),
        core.Article(pmid="4321", fulltext=article_text2, journal=journal2, year = "2016"),
    ]

    for article in articles:
        article.predict_interactions()
    summary = report.ReportSummary(articles)
    fig1, fig2, fig3 = summary.journal_plots()
    assert(fig1 and fig2 and fig3)
Exemple #2
0
def test_prot_count():
    '''
    Tests if prot count features work
    '''
    text = "MAPK13 seems to be directly correlated with MAPK12, which would mean that MAPK13 depends on the expression of MAPK13 and MAPK12."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        #print(",,".join([token['word'] for token in sentence.tokens]))
        second_candidate = sentence.candidates[1]
        second_candidate.compute_features()
        last_candidate = sentence.candidates[-1]
        last_candidate.compute_features()
        '''
        Assert that:
        [MAPK13] seems to be directly correlated with MAPK12, which would mean that [MAPK13]
            - MAPK13 only appears two times
            - etc.
        (...) [MAPK13] and [MAPK12].
            - MAPK12 appears 2 times
            - etc.
        '''

        assert (second_candidate.features_todense()[112] == 2
                and second_candidate.features_todense()[113] == 2
                and second_candidate.features_todense()[114] == 3
                and second_candidate.features_todense()[115] == 3
                and last_candidate.features_todense()[112] == 1
                and last_candidate.features_todense()[113] == 1
                and last_candidate.features_todense()[114] == 3
                and last_candidate.features_todense()[115] == 2)
Exemple #3
0
def test_number():
    '''
    Tests passing non string to StanfordCoreNLP
    '''
    sentence = 1
    article = core.Article(pmid="1234", fulltext=sentence)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
Exemple #4
0
def analyze_article(text):
    '''
    Returns analyzed article object. Useful for many tests
    '''
    art = core.Article(pmid="1234", fulltext=text)
    art.extract_sentences()
    for sentence in art.sentences:
        sentence.annotate()
        sentence.get_candidates()
    return art
Exemple #5
0
def test_decode_html():
    '''
    Tests decoding of HTML entities before annotation
    '''
    sentence = '10-year risk < 10% (I),10 - year risk=%10 - 20 (II), and 10-year risk > 20% (III).'
    article = core.Article(pmid="1234", fulltext=sentence)
    article.extract_sentences()
    assert (
        article.sentences[0].originaltext ==
        '10-year risk < 10% (I),10 - year risk=%10 - 20 (II), and 10-year risk > 20% (III).'
    )
Exemple #6
0
def test_get_pos_annotation():
    '''
    Tests the method get_token_pos()
    '''
    text = "The protein MAPK14 seems to interact with MAPK12."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        assert (sentence.candidates[0]._InteractionCandidate__get_token_pos(
            mode="between") == "NN,VBZ,TO,VB,IN,NN")
Exemple #7
0
def test_candidates_multiple_sentences():
    '''
    Tests if multiple sentences in one string work
    '''
    text = "In patients with a complete response to PROT4  , the levels of PROT2  were higher at 24 weeks following PROT4  treatment than that of pre - treatment ( P = 0.04 ) , and the levels of PROT3  decreased markedly at 12 and 24 weeks ( P = 0.02 , 0.03 , respectively ) . mRNA expression positively correlated with the level of PROT55 / Th2 type cytokines in the PROT99 ."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    total_candidates = list()
    for sentence in article.sentences:
        sentence.get_candidates()
        total_candidates.extend(sentence.candidates)
    assert (total_candidates[-1].prot2.symbol == "PROT99")
Exemple #8
0
def test_prediction():
    '''
    Tests if the prediction works
    '''
    text = "PROT12 interacts with PROT2."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        sentence.candidates[0].compute_features()
        sentence.candidates[0].predict()
        assert (sentence.candidates[0].votes == 0.738)
Exemple #9
0
def test_get_proteins():
    '''
    Tests the retrieval of candidates
    '''
    text = "However, MAPK is a better target for chloroacetate esterase which is an essential protein for cryoglobulin."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        candidate = str(sentence.candidates[0])
        assert (
            candidate == "[MAPK] may interact with [chloroacetate esterase]")
Exemple #10
0
def test_predictions_method_empty():
    """
    Tests article prediction method when prediction has NOT been performed
    """
    article_text = """
        MAPK seems to interact with chloroacetate esterase.
        However, MAPK is a better target for peroxydase.
        The thing is, Schmidtea mediterranea is a good model organism because reasons.
        However, cryoglobulin is better.
    """
    article = core.Article(pmid="1234", fulltext=article_text)
    predictions = article.predictions
    assert (predictions == [])
Exemple #11
0
def test_only_dictionary():
    '''
    Tests if restriction of proteins to only those in dictionary work.
    '''
    text = "ERK1 interacts with MAPK4. MPPK2 interacts with MAPK4."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    total_candidates = 0
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates(only_dict=True)
        total_candidates += len(sentence.candidates)
    assert (total_candidates == 1)
Exemple #12
0
def test_total_tokens():
    '''
    Tests total tokens
    '''
    text = "The protein MAPK interacts directly with cryoglobulin which is very interesting."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.compute_features()
            assert (candidate.features_todense()[1] == 12)
Exemple #13
0
def get_pos_count():
    '''
    Tests pos count feature
    '''
    text = "The protein MAPK14 seems to interact with MAPK12."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.compute_features()
            assert (candidate.features_todense()[85] == 1)
Exemple #14
0
def test_predictions_method_filled_content():
    """
    Tests article prediction method when prediction has been performed
    """
    article_text = """
        MAPK seems to interact with chloroacetate esterase.
        However, MAPK is a better target for peroxydase.
        The thing is, Schmidtea mediterranea is a good model organism because reasons.
        However, cryoglobulin is better.
    """
    article = core.Article(pmid="1234", fulltext=article_text)
    article.extract_interactions()
    predictions = article.predictions
    assert (isinstance(predictions[0], core.InteractionCandidate))
Exemple #15
0
def test_sentence_to_html():
    '''
    Tests if sentence to html works
    '''
    text = "The transcription factor of THOC2 seems to be interacting with the nuclear receptor protein 2."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        assert (
            sentence.to_html() ==
            'The transcription factor of <span class="prot"> THOC2 </span> <span class="verb">seems</span> to <span class="verb">be</span> <span class="verb">interacting</span> with the <span class="prot"> nuclear receptor protein 2 </span> .'
        )
Exemple #16
0
def test_report_html():
    '''
    Tests journal plot
    '''
    article_text1 = """
    Sak binds to p53 , and studies are underway to provide a molecular context for the Sak-p53 interaction.
    By coimmunoprecipitation coupled with mass spectrometry, we demonstrate that AHNAK interacts with dysferlin.
    """
    journal1 = "PLOS ONE"
    article_text2 = """
    Here we show that KLF4 physically interacts with STAT3 upon cytokine-induced phosphorylation of tyrosine 705 ( Y705 ) on STAT3.
    In this study , we report the Grb7 protein interacts with Filamin-a , an actin-crosslinking component of the cell cytoskeleton.
    """
    journal2 = "BMC GENOMICS"

    articles = [
        core.Article(pmid="1234", fulltext=article_text1, journal=journal1, year = "2009"),
        core.Article(pmid="4321", fulltext=article_text2, journal=journal2, year = "2016"),
    ]

    for article in articles:
        article.predict_interactions()
    summary = report.ReportSummary(articles)
    summary.make_report("kktest")
Exemple #17
0
def test_keyword_count():
    '''
    Tests if keyword count features work
    '''
    text = "PROT12 interacts interacts and acetylates PROT1."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        sentence.candidates[0].compute_features()
        '''
        Assert that Interact == 2 and Acetylate == 1
        '''
        assert (sentence.candidates[0].features_todense()[150] == 2
                and sentence.candidates[0].features_todense()[116] == 1)
Exemple #18
0
def test_sentence_separator():
    '''
    Tests if sentence separator works...
    '''
    article = core.Article(pmid="1234",
                           fulltext="""
        To identify roles of Hh signaling in the planarian CNS maintenance, we examined gene expression changes using RNA sequencing of cephalic ganglia following RNAi of hh, ptc, or a control gene (C. elegans unc-22) not present in the planarian genome.
        We developed a dissection technique that allowed cephalic ganglia tissue to be collected from large (>2 cm) S2F1L3F2 sexual strain S. mediterranea animals following a brief acid-based fixation (Figure 1C).
        To test for enrichment using this dissection technique, amputated head fragments collected from CIW4 asexual strain S. mediterranea animals after six control dsRNA feedings were used as a reference library (Figure 1D).
        Head fragments contain cephalic ganglia as well as most major planarian tissue types (Hyman, 1951).
        The magic number is 12.45 for the species S. mediterranea.
        Figure 2.a and 3.B Is the most important.
        S. mediterranea and C. elegans.
        But not (S.mediterranea)
    """)
    article.extract_sentences()
    assert (len(article.sentences) == 8)
Exemple #19
0
def test_candidate_tohtml():
    '''
    Tests candidate to html conversion
    '''
    article_text = """
        MAPK seems to interact with chloroacetate esterase.
        However, MAPK is a better target for peroxydase.
        Chloroacetate esterase domain 3 interacts with MAPK4.
    """
    article = core.Article(pmid="1234", fulltext=article_text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        sentence.candidates[0].compute_features()
        sentence.candidates[0].predict()
        sentence.candidates[0].to_html()
Exemple #20
0
def test_verb_features():
    '''
    Tests total number of verbs between proteins
    '''
    text = "The protein MAPK is interacting and activating directly with cryoglobulin which is very interesting."
    article = core.Article(pmid="1234", fulltext=text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.compute_features()
            # Check if it has detected one VBG (interacting) verb and another VBZ (is) verb
            # and verb scores (for now 3 and 5)
            assert (candidate.features_todense()[2:14] == [
                0, 0, 2, 0, 0, 1, 4, 7, 1, 4, 3, 6
            ])
Exemple #21
0
def home_form():
    identifiers = str()
    database    = str()
    response    = dict()
    response['search'] = False
    template = "base.html"
    response['URL_BASE'] = os.environ.get('URL_BASE', '')
    response['CITATION'] = CITATION
    response['CITATION_SHORT'] = CITATION_SHORT
    if 'file' in request.files:
        database = "PLAIN-TEXT"
        template = "progress.html"
        email = request.form['email']
        fcontent = request.files['file'].read()
        job_id = int(str(int(time.time())) + str(random.randint(0, 1000)))
        response['job_id'] = job_id
        query = core.PMQuery(ids=[], database=database)
        article = core.Article(pmid="NA", fulltext=fcontent, journal="NA", year=1)
        query.articles = [article]
        thread = ExportingThread(job_id, query, "fulltext", plain=True, email=email)
        thread.start()
    elif 'identifiers' in request.form:
        email = request.form['email']
        response['search'] = True
        template = "progress.html"
        job_id = int(str(int(time.time())) + str(random.randint(0, 100)))
        response['job_id'] = job_id
        identifiers = request.form['identifiers']
        database = request.form['database']
        identifiers = re.split(",|\n|\r", identifiers)
        identifiers = [ ident for ident in identifiers if ident ]
        if database == "PUBMED":
            source = "abstract"
        else:
            source = "fulltext"
        query = core.PMQuery(ids=identifiers, database=database)
        thread = ExportingThread(job_id, query, source, email=email)
        thread.start()
        # render progress template
        # which will do a jquery async query to /progess?job=XXXXX
        # Once progress == 3: redirect to /results?job=XXXX
    return render_template(template, identifiers=identifiers, response=response)
Exemple #22
0
def test_summary_intcount():
    '''
    Tests int_count of ProtSummary
    '''
    article_text = """
             MAPK seems to interact with chloroacetate esterase.
             However, MAPK is a better target for peroxydase.
             The thing is, Schmidtea mediterranea is a good model organism because reasons.
             However, cryoglobulin is better.
         """
    article = core.Article(pmid="1234", fulltext=article_text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.predict()
    summary = report.ReportSummary([article])
    summary.protsummary.makesummary()
    assert(summary.protsummary.prot_table['MAPK']['int_count']['left'] == 2)
Exemple #23
0
def test_article_annotation():
    '''
    Tests if article annotation works
    '''
    article_text = """
        MAPK seems to interact with chloroacetate esterase.
        However, MAPK is a better target for peroxydase.
        The thing is, Schmidtea mediterranea is a good model organism because reasons.
        However, cryoglobulin is better.
    """
    article = core.Article(pmid="1234", fulltext=article_text)
    prot_list = list()
    #article.annotate_sentences()
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        for token in sentence.tokens:
            if token['ner'] == "P":
                prot_list.append(token['word'])
    assert (",".join(prot_list) ==
            "MAPK,chloroacetate,esterase,MAPK,peroxydase,cryoglobulin")
Exemple #24
0
def test_interaction_list():
    '''
    Tests if GraphSummary.makesummary() creates the interaction list correctly
    '''
    article_text = """
             MAPK seems to interact with MAPK4.
             However, Mapk4 interacts directly with MAPK.
             CPP3 is a molecular target of Akt3.
             AKT3 is also known to interact with CPP3.
         """
    article = core.Article(pmid="1234", fulltext=article_text)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.predict()
    summary = report.ReportSummary([article])
    summary.graphsummary.makesummary()
    assert(
        len(summary.graphsummary.interactions) == 4 and
        summary.graphsummary.uniqinteractions_count == 2
    )
Exemple #25
0
def test_summary_prottable_tohtml():
    '''
    Tests int_count of ProtSummary
    '''
    article_text = """
             MAPK seems to interact with chloroacetate esterase.
             However, MAPK is a better target for peroxydase.
             The thing is, Schmidtea mediterranea is a good model organism because reasons.
             However, cryoglobulin is better.
         """
    article = core.Article(pmid="1234", fulltext=article_text, year=2015)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.predict()
    summary = report.ReportSummary([article])
    summary.protsummary.makesummary()
    thetable = summary.protsummary.table_to_html(sorted_by="int_count")
    reftable = """<table id="prottable">
<thead>
<tr>
<th>Protein</th>
<th>Total count</th>
<th>Int. count</th>
<th>Left count</th>
<th>Right count</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="http://www.uniprot.org/uniprot/?query=MAPK&sort=score" target="_blank">MAPK</a></td>
<td>2</td>
<td>2</td>
<td>2</td>
<td>0</td>
</tr>
<tr>
<td><a href="http://www.uniprot.org/uniprot/?query=CHLOROACETATE ESTERASE&sort=score" target="_blank">CHLOROACETATE ESTERASE</a></td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<td><a href="http://www.uniprot.org/uniprot/?query=PEROXYDASE&sort=score" target="_blank">PEROXYDASE</a></td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<td><a href="http://www.uniprot.org/uniprot/?query=CRYOGLOBULIN&sort=score" target="_blank">CRYOGLOBULIN</a></td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
</tr>
</tbody>
</table>"""
    assert(thetable == reftable)
Exemple #26
0
def test_interaction_table_html():
    '''
    Tests the markdown of the interactions table
    '''
    article_text = """
             MAPK seems to interact with MAPK4.
             However, Mapk4 interacts directly with MAPK.
             CPP3 is a molecular target of Akt3.
             AKT3 is also known to interact with CPP3.
         """
    article = core.Article(pmid="1234", fulltext=article_text, year=2017)
    article.extract_sentences()
    for sentence in article.sentences:
        sentence.annotate()
        sentence.get_candidates()
        for candidate in sentence.candidates:
            candidate.predict()
    summary = report.ReportSummary([article])
    summary.graphsummary.makesummary()
    reftable = """<table id="inttable">
<thead>
<tr>
<th>Confidence</th>
<th>Protein (A)</th>
<th>Protein (B)</th>
<th>Off.symbol (A)</th>
<th>Off.symbol (B)</th>
<th>PMid</th>
<th>Year</th>
<th>Sentence</th>
</tr>
</thead>
<tbody>
<tr>
<td>0.844</td>
<td>Mapk4</td>
<td>MAPK</td>
<td><a href="http://www.uniprot.org/uniprot/?query=MAPK4&sort=score" target="_blank">MAPK4</a></td>
<td><a href="http://www.uniprot.org/uniprot/?query=MAPK&sort=score" target="_blank">MAPK</a></td>
<td><a href="https://www.ncbi.nlm.nih.gov/pubmed/?term=1234" target="_blank">1234</a></td>
<td>2017</td>
<td>However , <span class="prot"> Mapk4 </span> <span class="verb">interacts</span> directly with <span class="prot"> MAPK </span> .</td>
</tr>
<tr>
<td>0.796</td>
<td>CPP3</td>
<td>Akt3</td>
<td><a href="http://www.uniprot.org/uniprot/?query=CPP3&sort=score" target="_blank">CPP3</a></td>
<td><a href="http://www.uniprot.org/uniprot/?query=AKT3&sort=score" target="_blank">AKT3</a></td>
<td><a href="https://www.ncbi.nlm.nih.gov/pubmed/?term=1234" target="_blank">1234</a></td>
<td>2017</td>
<td><span class="prot"> CPP3 </span> <span class="verb">is</span> a molecular target of <span class="prot"> Akt3 </span> .</td>
</tr>
<tr>
<td>0.744</td>
<td>MAPK</td>
<td>MAPK4</td>
<td><a href="http://www.uniprot.org/uniprot/?query=MAPK&sort=score" target="_blank">MAPK</a></td>
<td><a href="http://www.uniprot.org/uniprot/?query=MAPK4&sort=score" target="_blank">MAPK4</a></td>
<td><a href="https://www.ncbi.nlm.nih.gov/pubmed/?term=1234" target="_blank">1234</a></td>
<td>2017</td>
<td><span class="prot"> MAPK </span> <span class="verb">seems</span> to <span class="verb">interact</span> with <span class="prot"> MAPK4 </span> .</td>
</tr>
<tr>
<td>0.714</td>
<td>AKT3</td>
<td>CPP3</td>
<td><a href="http://www.uniprot.org/uniprot/?query=AKT3&sort=score" target="_blank">AKT3</a></td>
<td><a href="http://www.uniprot.org/uniprot/?query=CPP3&sort=score" target="_blank">CPP3</a></td>
<td><a href="https://www.ncbi.nlm.nih.gov/pubmed/?term=1234" target="_blank">1234</a></td>
<td>2017</td>
<td><span class="prot"> AKT3 </span> <span class="verb">is</span> also <span class="verb">known</span> to <span class="verb">interact</span> with <span class="prot"> CPP3 </span> .</td>
</tr>
</tbody>
</table>"""
    htmltable = summary.graphsummary.table_to_html()
    assert(htmltable == reftable)