Ejemplo n.º 1
0
def makeNpatterns(nPatternStringList):
    patterns = [[st.preprocess(i)[0] for i in j] for j in nPatternStringList]
    # print(patterns)
    patterns = [[[stemmer.stem(word) for word in sentence]
                 for sentence in sentenceTup] for sentenceTup in patterns]
    patterns = [[" ".join(i) for i in j] for j in patterns]
    return [nPattern(i) for i in patterns]
Ejemplo n.º 2
0
def stemFile(fileLst, spSet = {}):
	papers = []
	for paper in fileLst:
		try:
			papers.append((st.preprocess(paper[0], spSet), (st.preprocess(paper[1], spSet))))
		except:
			print(paper)
			raise

	stemmed = []
	for paper in papers:
		#title ,abs
		stemmed.append([[stemmer.stem(i) for i in paper[0][0]], [[stemmer.stem(word) for word in sentence] for sentence in paper[1]]\
			])
	#print("Preproc:", preproc[0][0][0])
	# untagged = [[[[tup[0] for tup in sentence] for sentence in component] for component in paper] for paper in preproc]
	# stemmed = [[[[stemmer.stem(word) for word in sentence] for sentence in component] for component in paper] for paper in untagged]
	return stemmed
Ejemplo n.º 3
0
def stemFile(fileLst, spSet = {}):
	papers = []
	for paper in fileLst:
		try:
			papers.append((st.preprocess(paper[0], spSet), (st.preprocess(paper[1], spSet))))
		except:
			print(paper)
			raise

	stemmed = []
	for paper in papers:
		#title ,abs
		stemmed.append([[stemmer.stem(i) for i in paper[0][0]], [[stemmer.stem(word) for word in sentence] for sentence in paper[1]]\
			])
	#print("Preproc:", preproc[0][0][0])
	# untagged = [[[[tup[0] for tup in sentence] for sentence in component] for component in paper] for paper in preproc]
	# stemmed = [[[[stemmer.stem(word) for word in sentence] for sentence in component] for component in paper] for paper in untagged]
	return stemmed
Ejemplo n.º 4
0
def execute(abstract_list, cutoff):
    patternList = makePatterns(patterns)
    patternList += makeNpatterns(nPatterns)
    antiPatternList = makePatterns(antiPatterns)

    results = []
    counter = 1
    end = len(abstract_list)
    #MULTIPROCESS THIS
    for abstract in abstract_list:
        print("Processing abstract {}/{}".format(counter, end))
        print(abstract)
        counter += 1
        abstract = abstract.lower()
        #Generate species pairs from the abstract
        species = getSpecies(abstract, bacterial_species_base)
        species_pairs = {
            tuple(sorted([i, j]))
            for i in species for j in species if i != j
        }

        #Split the abstract into sentences
        abstract = st.preprocess(abstract)
        abstract = [" ".join(i) for i in abstract]

        hits = []
        misses = []
        for pair in species_pairs:
            #initialize the patterns
            for pattern in patternList:
                print(pattern.text)
                pattern.initialize(pair[0], pair[1])
            for pattern in antiPatternList:
                pattern.initialize(pair[0], pair[1])
            #apply patterns to the abstract
            print(pair[0])
            print(pair[1])
            print(patternList[0].regexes)
            print(patternList[0].pCheck(abstract))
            temp_hits = [pattern.pCheck(abstract) for pattern in patternList]
            hits += [i for i in temp_hits if i != []]
            temp_misses = [
                pattern.pCheck(abstract) for pattern in antiPatternList
            ]
            misses += [i for i in temp_misses if i != []]

        print("hits: ", hits)
        print("misses: ", misses)
        if len(hits) >= cutoff and len(misses) == 0:
            results.append((abstract, len(hits)))
    return results
def debug(abstract):
	patternList = makePatterns(patterns)
	test = patternList[0]
	test.initialize("a aaa", "b bbb")
	text = st.preprocess("Activity of a aaa against b bbb. The a aaa against c is.".lower())
	text = [" ".join(i) for i in text]
	print(text)
	print(patternList[0].regexes)
	print(test.pCheck(text))
	print(test.check(text[0]))
	print(test.regexes[0])
	print(text[0])
	print(test.regexes[0].search(text[0]))
	raise
def makePatterns(patternStringList):
	patterns = [st.preprocess(i)[0] for i in patternStringList]
	patterns = [[stemmer.stem(j) for j in i ] for i in patterns]
	patterns = [" ".join(i) for i in patterns]
	return [Pattern(i) for i in patterns]	
Ejemplo n.º 7
0
def makeNpatterns(nPatternStringList):
	patterns = [[st.preprocess(i)[0] for i in j] for j in nPatternStringList]
	# print(patterns)
	patterns = [[[stemmer.stem(word) for word in sentence ] for sentence in sentenceTup] for sentenceTup in patterns]
	patterns = [[" ".join(i) for i in j] for j in patterns]
	return [nPattern(i) for i in patterns]	
Ejemplo n.º 8
0
def makePatterns(patternStringList):
	patterns = [st.preprocess(i)[0] for i in patternStringList]
	patterns = [[stemmer.stem(j) for j in i ] for i in patterns]
	patterns = [" ".join(i) for i in patterns]
	return [Pattern(i) for i in patterns]	
Ejemplo n.º 9
0
	def tokStem(self, paragraph):
		temp = st.preprocess(paragraph, self.spSet)
		temp = [[stemmer.stem(i) for i in j] for j in temp]
		return [" ".join(i) for i in temp]
Ejemplo n.º 10
0
    for i in f:
        spSet.add(i.strip().lower())
        spSet.add(abb(i.strip().lower()))

stemmer = SnowballStemmer("english")

#Single-line Patterns
patterns = [
    "Activity sjA against sjB", "containing sjA inhibited sjB",
    "sjA decreased sjB", "bacteriocin sjA sjB", "sjA compet sjB",
    "Antagonistic sjA on sjB", "sjA antimicrobial sjB",
    "sjA antibacterial sjB", "sjA antagonistic sjB", "sjA bacteriocin sjB",
    "sjA inhibit sjB", "inhibit sjA by sjB"
]

patterns = [st.preprocess(i)[0] for i in patterns]
patterns = [[stemmer.stem(j) for j in i] for i in patterns]
patterns = [" ".join(i) for i in patterns]
compiled = [Pattern(i) for i in patterns]

#Multi-Line Patterns
nPatterns = [["bacteriocin produced sjA", "inhibit sjB"]]
nPatterns = [[st.preprocess(i)[0] for i in j] for j in nPatterns]
nPatterns = [[[stemmer.stem(i) for i in j] for j in k] for k in nPatterns]
nPatterns = [[" ".join(i) for i in j] for j in nPatterns]
compiled += [nPattern(i) for i in nPatterns]

#Antipatterns
antiPatterns = []
antiPatterns = [st.preprocess(i)[0] for i in antiPatterns]
antiPatterns = [[stemmer.stem(j) for j in i] for i in antiPatterns]
Ejemplo n.º 11
0
#!/usr/bin/env python 3

from modules import sent_tokenize as st

tst = "colonic colonization of clostridium spp. is associated with accumulation of tregs, which inhibits development of inflammatory lesions. to investigate whether infection with the clostridium leptum sp. can specifically induce tregs and/or tdcs bone marrow-derived dendritic cells were cultured in the presence or absence of c. leptum then co-cultured with cd4(+)cd25(-) t cells or not"
print(st.preprocess(tst))
Ejemplo n.º 12
0
stemmer = SnowballStemmer("english")

patterns = ["Activity sjA against sjB",
"containing sjA inhibited sjB",
"sjA decreased sjB",
"bacteriocin sjA sjB",
"sjA compet sjB",
"Antagonistic sjA on sjB",
"sjA antimicrobial sjB",
"sjA antibacterial sjB",
"sjA antagonistic sjB",
"sjA bacteriocin sjB",
"sjA inhibit sjB",
"inhibit sjA by sjB"]

patterns = [st.preprocess(i)[0] for i in patterns]
patterns = [[stemmer.stem(j) for j in i ] for i in patterns]
patterns = [" ".join(i) for i in patterns]
compiled = [pattern(i) for i in patterns]

# nPatterns = [
# ["bacteriocin produced sjA", "inhibit sjB"]
# ]
# nPatterns = [[st.preprocess(i)[0] for i in j] for j in nPatterns]
# nPatterns = [[[stemmer.stem(i)for i in j] for j in k] for k in nPatterns]
# nPatterns = [[" ".join(i) for i in j] for j in nPatterns]
# compiled += [nPattern(i) for i in nPatterns]

print("------PATTERNS------")
[print(i.text) for i in compiled]
# print("nPatterns: ", nPatterns)