예제 #1
0
def biomarkerMediumRelations(filename):
    # Processing the input data and converting to sentences

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences
    pckl_f = "cache/" + filename + "/" + 'sentences.pkl'
    # try:
    #     #Try to see if the sentences have been parsed
    #     with open(pckl_f, 'rb') as f:
    #         #Load em in if they have
    #         sentences = cPickle.load(f)
    # except:
    sentences = doc_parser.parseDoc(filename)

    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + _filename +"/" ):
    #     os.makedirs("cache/" + _filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    medium_ngrams = Ngrams(n_max=3)

    # Create the two matchers who have been defined in separate classes
    BM = matchers.getBiomarkerMatcher()
    MM = matchers.getMediumMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_MM = CandidateExtractor(medium_ngrams, MM)

    # Create the relations using the two matchers
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_MM)
    return possiblePairs
예제 #2
0
def biomarkerDrugRelations(filename):
    # Processing the input data and converting to sentences
    sentences = doc_parser.parseDoc(filename)

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences

    # print sentences
    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + filename +"/" ):
    #     os.makedirs("cache/" + filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    drug_association_ngrams = Ngrams(n_max=5)

    BM = matchers.getBiomarkerMatcher()
    DAM = matchers.getDrugAssociationMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_DAM = CandidateExtractor(drug_association_ngrams, DAM)

    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_DAM)

    return possiblePairs
예제 #3
0
def getBiomarkerTestsetRelations(sentences, session):
    biomarker_ngrams = Ngrams(n_max=1)
    test_set_ngrams = Ngrams(n_max=10)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    TS = matchers.getTestSetMatcher()

    CandidateExtractor_BM = CandidateExtractor(Disease, biomarker_ngrams, BM)
    CandidateExtractor_TS = CandidateExtractor(Disease, test_set_ngrams, TS)

    # Create Relations Object
    possiblePairs = Relations(
        sentences, CandidateExtractor_BM, CandidateExtractor_TS)

    return possiblePairs
예제 #4
0
def getBiomarkerLevelsRelations(sentences, session):
    biomarker_ngrams = Ngrams(n_max=1)
    levels_ngrams = Ngrams(n_max=15)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    L = matchers.getLevelsMatcher()

    CandidateExtractor_BM = CandidateExtractor(Disease, biomarker_ngrams, BM)
    CandidateExtractor_L = CandidateExtractor(Disease, levels_ngrams, L)

    # Create Relations Object
    possiblePairs = Relations(
        sentences, CandidateExtractor_BM, CandidateExtractor_L)

    return possiblePairs
예제 #5
0
def getBiomarkerUnitsRelations(sentences, session):
    biomarker_ngrams = Ngrams(n_max=1)
    unit_ngrams = Ngrams(n_max=10)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    U = matchers.getUnitsMatcher()

    CandidateExtractor_BM = CandidateExtractor(Disease, biomarker_ngrams, BM)
    CandidateExtractor_U = CandidateExtractor(Disease, unit_ngrams, U)

    # Create Relations Object
    possiblePairs = Relation(
        sentences, CandidateExtractor_BM, CandidateExtractor_U)

    return possiblePairs
예제 #6
0
def biomarkerMeasurementRelations(sentences):
    biomarker_ngrams = Ngrams(n_max=1)
    measurement_type_ngrams = Ngrams(n_max=5)

    # Retrieve Matchers:
    BM = matchers.getBiomarkerMatcher()
    MT = matchers.getMeasurementTypeMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_MT = CandidateExtractor(measurement_type_ngrams, MT)

    # Create Relations Object
    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_MT)

    return possiblePairs
예제 #7
0
def getBiomarkerDrugRelations(filename, session):
    # Processing the input data and converting to sentences
    sentences = doc_parser.parseDoc(filename, session)

    biomarker_ngrams = Ngrams(n_max=1)
    drug_association_ngrams = Ngrams(n_max=5)

    BM = matchers.getBiomarkerMatcher()
    DAM = matchers.getDrugAssociationMatcher()
    
    ce = CandidateExtractor(Disease, [biomarker_ngrams, drug_association_ngrams], [BM, DAM])

    # possible pairs/relations (candidates)
    c = ce.extract(sentences, 'BDA Development Candidates', session)
    print "Number of candidates:", len(c)

    return c, sentences, Disease
예제 #8
0
def getBiomarkerMediumRelations(filename, session):
    # Processing the input data and converting to sentences
    sentences = doc_parser.parseDoc(filename, session)

    biomarker_ngrams = Ngrams(n_max=1)
    medium_ngrams = Ngrams(n_max=3)

    # Create the two matchers who have been defined in separate classes
    BM = matchers.getBiomarkerMatcher()
    MM = matchers.getMediumMatcher()

    CandidateExtractor_BM = CandidateExtractor(Disease, biomarker_ngrams, BM)
    CandidateExtractor_MM = CandidateExtractor(Disease, medium_ngrams, MM)

    # Create the relations using the two matchers
    possiblePairs = Relations(
        sentences, CandidateExtractor_BM, CandidateExtractor_MM)
    return possiblePairs
예제 #9
0
def getBiomarkerTypeRelations(filename, session):
    """
    Processing the input data and converting to sentences
    """
    sentences = doc_parser.parseDoc(filename, session)

    biomarker_ngrams = Ngrams(n_max=1)
    biomarker_type_ngrams = Ngrams(n_max=2)

    BM = matchers.getBiomarkerMatcher()
    TM = matchers.getBiomarkerTypeMatcher()

    CandidateExtractor_BM = CandidateExtractor(Disease, biomarker_ngrams, BM)
    CandidateExtractor_TM = CandidateExtractor(
        Disease, biomarker_type_ngrams, TM)

    # Running the CandidateExtractor to get Relations
    possiblePairs = Relations(
        sentences, CandidateExtractor_BM, CandidateExtractor_TM)

    return possiblePairs
예제 #10
0
def getBiomarkerDiseaseRelations(session):
    # Processing the input data and converting to sentences
    # sentences = doc_parser.parseDoc(filename, session)
    #session = doc_parser.useCDRdata(session)
    
    corpus = session.query(Corpus).filter(Corpus.name == 'Training').one()
    sentences = set()
    for document in corpus:
        for sentence in document.sentences:
            sentences.add(sentence)
                 
    biomarker_ngrams = Ngrams(n_max=1)
    disease_ngrams = Ngrams(n_max=5)

    # Create the two matchers who have been defined in separate classes
    BM = matchers.getBiomarkerMatcher()
    DM = matchers.getDiseaseMatcher()

    # Building the CandidateExtractor: 
    # Combine candidate class, child context spaces, and matchers into the extractor
    ce = CandidateExtractor(Disease, [biomarker_ngrams, disease_ngrams], [BM, DM])
    
    # Running the CandidateExtractor: Retrieve cadidate set / relations 
    c = ce.extract(sentences, 'BD Training Candidates', session)
    print "Number of candidates:", len(c)
    session.add(c)
    session.commit()
    
    #repeat for dev and test!
    #for corpus_name in ['BD Development', 'BD Test']:
    #    corpus = session.query(Corpus).filter(Corpus.name == corpus_name).one()
    #    sentences = set()
    #    for document in corpus:
    #        for sentence in document.sentences:
    #            sentences.add(sentence)
    #    c = ce.extract(sentences, corpus_name + ' Candidates', session)
    #    session.add(c)
    #session.commit()
    
    return c, sentences, Disease, session
예제 #11
0
def biomarkerTypeRelations(filename):
    """
    Processing the input data and converting to sentences
    """

    # If the file has already been parsed, there is no point in reparsing.
    # Just open the already parsed sentences
    # pckl_f = "cache/" + filename + "/" + 'sentences.pkl'
    # try:
    #     #Try to see if the sentences have been parsed
    #     with open(pckl_f, 'rb') as f:
    #         #Load em in if they have
    #         sentences = cPickle.load(f)
    # except:

    sentences = doc_parser.parseDoc(filename)

    # Serialize so that you dont have to deal with parsing in the future
    # if not os.path.exists("cache/" + filename +"/" ):
    #     os.makedirs("cache/" + filename + "/")
    # with open(pckl_f, 'w+') as f:
    #     cPickle.dump(sentences, f)

    biomarker_ngrams = Ngrams(n_max=1)
    biomarker_type_ngrams = Ngrams(n_max=2)

    BM = matchers.getBiomarkerMatcher()
    TM = matchers.getBiomarkerTypeMatcher()

    CandidateExtractor_BM = CandidateExtractor(biomarker_ngrams, BM)
    CandidateExtractor_TM = CandidateExtractor(biomarker_type_ngrams, TM)

    possiblePairs = Relations(sentences, CandidateExtractor_BM,
                              CandidateExtractor_TM)

    return possiblePairs