예제 #1
0
def biomarkerMediumRelations(filename):
    # Processing the input data and converting to sentences

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences
    pckl_f = "cache/" + filename + "/" + 'sentences.pkl'
    # try:
    #     #Try to see if the sentences have been parsed
    #     with open(pckl_f, 'rb') as f:
    #         #Load em in if they have
    #         sentences = cPickle.load(f)
    # except:
    sentences = doc_parser.parseDoc(filename)

    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + _filename +"/" ):
    #     os.makedirs("cache/" + _filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    medium_ngrams = Ngrams(n_max=3)

    # Create the two matchers who have been defined in separate classes
    BM = matchers.getBiomarkerMatcher()
    MM = matchers.getMediumMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_MM = CandidateExtractor(medium_ngrams, MM)

    # Create the relations using the two matchers
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_MM)
    return possiblePairs
예제 #2
0
def biomarkerDrugRelations(filename):
    # Processing the input data and converting to sentences
    sentences = doc_parser.parseDoc(filename)

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences

    # print sentences
    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + filename +"/" ):
    #     os.makedirs("cache/" + filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    drug_association_ngrams = Ngrams(n_max=5)

    BM = matchers.getBiomarkerMatcher()
    DAM = matchers.getDrugAssociationMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_DAM = CandidateExtractor(drug_association_ngrams, DAM)

    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_DAM)

    return possiblePairs
def generateRelations(sentences):
    biomarker_ngrams = Ngrams(n_max=1)
    unit_ngrams = Ngrams(n_max=10)

    # Retrieve Matchers:
    BM = BiomarkerMatcher.getBiomarkerMatcher()
    U = UnitsMatcher.getUnitsMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_U = CandidateExtractor(unit_ngrams, U)

    # Create Relations Object
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_U)

    return possiblePairs
예제 #4
0
def biomarkerTestsetRelations(sentences):
    biomarker_ngrams = Ngrams(n_max=1)
    test_set_ngrams = Ngrams(n_max=10)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    TS = matchers.getTestSetMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_TS = CandidateExtractor(test_set_ngrams, TS)

    # Create Relations Object
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_TS)

    return possiblePairs
예제 #5
0
def biomarkerMeasurementRelations(sentences):
    biomarker_ngrams = Ngrams(n_max=1)
    measurement_type_ngrams = Ngrams(n_max=5)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    MT = matchers.getMeasurementTypeMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_MT = CandidateExtractor(measurement_type_ngrams, MT)

    # Create Relations Object
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_MT)

    return possiblePairs
예제 #6
0
def biomarkerLevelsRelations(sentences):
    biomarker_ngrams = Ngrams(n_max=1)
    levels_ngrams = Ngrams(n_max=15)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    L = matchers.getLevelsMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_L = CandidateExtractor(levels_ngrams, L)

    # Create Relations Object
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_L)

    return possiblePairs
예제 #7
0
def biomarkerTypeRelations(filename):
    """
    Processing the input data and converting to sentences
    """

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences
    # pckl_f = "cache/" + filename + "/" + 'sentences.pkl'
    # try:
    #     #Try to see if the sentences have been parsed
    #     with open(pckl_f, 'rb') as f:
    #         #Load em in if they have
    #         sentences = cPickle.load(f)
    # except:

    sentences = doc_parser.parseDoc(filename)

    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + filename +"/" ):
    #     os.makedirs("cache/" + filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    biomarker_type_ngrams = Ngrams(n_max=2)

    BM = matchers.getBiomarkerMatcher()
    TM = matchers.getBiomarkerTypeMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_TM = CandidateExtractor(biomarker_type_ngrams, TM)

    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_TM)

    return possiblePairs
def generateRelations(_filename):
    # Processing the input data and converting to sentences

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences
    #pckl_f = _filename
    # try:
    #     #Try to see if the sentences have been parsed
    # with open(pckl_f, 'rb') as f:
    #         #Load em in if they have
    #    sentences = pickle.load(f)
    # except:
    #doc_parser = TextDocParser(_filename)
    #sent_parser = SentenceParser()
    #corpus = Corpus(doc_parser, sent_parser)
    # print corpus

    # Sentences havent been parsed, so parse them now
    #sentences = corpus.get_contexts()
    sentences = doc_parser.parseDoc(_filename)
    print sentences

    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + _filename +"/" ):
    #     os.makedirs("cache/" + _filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    disease_ngrams = Ngrams(n_max=5)

    # Create the two matchers who have been defined in separate classes
    BM = BiomarkerMatcher.getBiomarkerMatcher()
    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    DM = DiseaseMatcher.getDiseaseMatcher()
    CandidateExtractor_DM = CandidateExtractor(disease_ngrams, DM)

    # #fix disease candidate generator error- "ovarian and prostate cancer"
    # with open('diseaseDatabase.pickle', 'rb') as f:
    #     diseaseDictionary = pickle.load(f)
    # DiseaseMatch = DictionaryMatch(label = "Diseases", dictionary = diseaseDictionary, ignore_case= True)
    # E = Entities(sentences, DiseaseMatch)
    # filename = "AGR2_blood_biomarker.txt"
    # text = open(filename, "r").read()
    # editedText = DiseaseCandidateGenerator.addDiseaseBases(E, diseaseDictionary ,text)
    # editedSentences = []
    # sentence_parser = SentenceParser()
    # list = sentence_parser.parse(editedText, 1)
    # for editsentence in list:
    #     editedSentences.append(editsentence)
    # Create the relations using the two matchers

    possiblePairs = Relations(
        sentences, CandidateExtractor_BM, CandidateExtractor_DM)

    # badCount = 0
    # goodCount = 0
    # allGoodPairs = []
    # for goodPair in possiblePairs:
    #     allGoodPairs.append(goodPair.sent_id)
    # #Return these pairs
    # for sentence in sentences:
    #     if sentence.sent_id in allGoodPairs:
    #         print "GOOD PAIR: "
    #         print sentence.words
    #         print "\n"
    #         goodCount += 1
    #     else:
    #         print "BAD PAIR:"
    #         print sentence.words
    #         print "\n"
    #         badCount += 1
    # print goodCount
    # print badCount

    # print possiblePairs
    return possiblePairs