def explainRelevance(guid):
    """
        Given a guid, it prepares its explainer document
    """
    doc=cp.Corpus.loadSciDoc(guid)
    html=doc.prettyPrintDocumentHTML(True,True,False,tokenizeAndTag, referenceFormatting)
    computeOverlapForAllReferences(doc,doc.glob["_inverted_index"])
    writeFileText(html,cp.Corpus.dir_output+guid+"_explain.html")
Exemple #2
0
def explainRelevance(guid):
    """
        Given a guid, it prepares its explainer document
    """
    doc = cp.Corpus.loadSciDoc(guid)
    html = doc.prettyPrintDocumentHTML(True, True, False, tokenizeAndTag,
                                       referenceFormatting)
    computeOverlapForAllReferences(doc, doc.glob["_inverted_index"])
    writeFileText(html, cp.Corpus.dir_output + guid + "_explain.html")
Exemple #3
0
    def generateVisualization(self, guid):
        """
            Given a guid, it prepares its explainer document
        """
##        output_dir=Corpus.dir_output
        output_dir=r"C:\Users\dd\Documents\Dropbox\PhD\code\doc_visualization\\"

        doc=Corpus.loadSciDoc(guid)
        Corpus.tagAllReferencesAsInCollectionOrNot(doc)
        counts1=self.getDocumentTokens(doc)

        # generate a unique id for each unique term, make a dictionary
        for index, token in enumerate(counts1):
            self.term_info[token]={"token_id":str(index), "references": []}

        self.overlapping_tokens={}

        in_collection_references=Corpus.getMetadataByGUID(guid)["outlinks"]
        for ref in doc["references"]:
            match=Corpus.matchReferenceInIndex(ref)
            if match:
                doc2=Corpus.loadSciDoc(match["guid"])
                counts2=self.getDocumentTokens(doc2)
                # for each in_collection_reference number (0 onwards) we store the list
                # of its overlapping tokens with the current document

                self.overlapping_tokens[ref["id"]]=self.getOverlappingTokens(counts1, counts2)

                for token in self.overlapping_tokens[ref["id"]]:
                    ref_list=self.term_info[token]["references"]
                    if ref["id"] not in ref_list:
                        ref_list.append(ref["id"])

        # try to find some signal in the noise
        self.filterTokens()

        json_str="var token_data=" + json.dumps(self.overlapping_tokens) + ";"
        writeFileText(json_str, output_dir+guid+"_data.json")

        html=doc.prettyPrintDocumentHTML(
            True,
            True,
            False,
##            extra_attribute_function=self.extraAttributes,
            citation_formatting_function=self.citationFormatting,
            reference_formatting_function=self.referenceFormatting,
            text_formatting_function=self.textFormatting
            )
        html=self.padWithHTML(html, guid)
        writeFileText(html,output_dir+guid+"_vis.html")
    def generateVisualization(self, guid):
        """
            Given a guid, it prepares its explainer document
        """
##        output_dir=Corpus.dir_output
        output_dir=r"C:\Users\dd\Documents\Dropbox\PhD\code\doc_visualization\\"

        doc=Corpus.loadSciDoc(guid)
        Corpus.tagAllReferencesAsInCollectionOrNot(doc)
        counts1=self.getDocumentTokens(doc)

        # generate a unique id for each unique term, make a dictionary
        for index, token in enumerate(counts1):
            self.term_info[token]={"token_id":str(index), "references": []}

        self.overlapping_tokens={}

        in_collection_references=Corpus.getMetadataByGUID(guid)["outlinks"]
        for ref in doc["references"]:
            match=Corpus.matchReferenceInIndex(ref)
            if match:
                doc2=Corpus.loadSciDoc(match["guid"])
                counts2=self.getDocumentTokens(doc2)
                # for each in_collection_reference number (0 onwards) we store the list
                # of its overlapping tokens with the current document

                self.overlapping_tokens[ref["id"]]=self.getOverlappingTokens(counts1, counts2)

                for token in self.overlapping_tokens[ref["id"]]:
                    ref_list=self.term_info[token]["references"]
                    if ref["id"] not in ref_list:
                        ref_list.append(ref["id"])

        # try to find some signal in the noise
        self.filterTokens()

        json_str="var token_data=" + json.dumps(self.overlapping_tokens) + ";"
        writeFileText(json_str, output_dir+guid+"_data.json")

        html=doc.prettyPrintDocumentHTML(
            True,
            True,
            False,
##            extra_attribute_function=self.extraAttributes,
            citation_formatting_function=self.citationFormatting,
            reference_formatting_function=self.referenceFormatting,
            text_formatting_function=self.textFormatting
            )
        html=self.padWithHTML(html, guid)
        writeFileText(html,output_dir+guid+"_vis.html")