예제 #1
0
	def __init__(self, filePath):
		#initalize the species sets
		sja, sjb = pp.getNames(filePath)[0], pp.getNames(filePath)[1]
		self.spSet1 = set(sja)
		self.spSet2 = set(sjb)
		self.spSet = self.spSet1.union(self.spSet2)

		#Load the papers
		self.spFile = pp.spFile(filePath, purge = True)
		#unified is a tuple: (spFile.papers[i], Paper)
		self.unified = [(i, Paper(i, self.spSet)) for i in self.spFile.papers]
예제 #2
0
	def __init__(self, filePath, terms):
		#initalize the species sets
		self.terms = terms.split(':')
		#pad the tags with spaces
		self.terms = [i.lower() + ' ' * (4-len(i)) for i in self.terms]
		print(self.terms)
		sja, sjb = pp.getNames(filePath)[0], pp.getNames(filePath)[1]
		self.spSet1 = set(sja)
		self.spSet2 = set(sjb)
		self.spSet = self.spSet1.union(self.spSet2)

		#Load the papers
		self.rawPapers = pp.loadFile(filePath)
		#preprep the termDict
		self.papers = self.paperSplit(self.rawPapers)
		self.papers = [{i:j for i,j in zip(self.terms, k)} for k in self.papers]
		self.papers = [Paper(onePaper, self.spSet) for onePaper in self.papers ]
예제 #3
0
def debug(filePath):
	#Extraction of subject names
	names = pp.getNames(filePath)
	sja, sjb = names[0][0], names[1][0]

	#patternList = makePatterns(patterns)
	patternList = makeNpatterns(nPatterns)
	# reverseList = makePatterns(patterns)
	for pattern in patternList:
		pattern.initialize(sja, sjb)
	pairPapers = Pair(filePath)
	print(pairPapers.spFile.papers[0])
	print(pairPapers.unified[0][1].sAbstract)
	print(patternList[0].pCheck(pairPapers.unified[0][1].sAbstract))
예제 #4
0
def execute(filePath, out_dir=""):

    #Extraction of subject names
    names = pp.getNames(filePath)
    sja, sjb = stemmer.stem(names[0][0]), stemmer.stem(names[1][0])

    out_name = "{}#{}.json".format(sja.replace(" ", "_"),
                                   sjb.replace(" ", "_"))

    out_path = os.path.join(out_dir, out_name)

    patternList = makePatterns(patterns)
    patternList += makeNpatterns(nPatterns)
    antiPatternList = makePatterns(antiPatterns)
    for pattern in patternList:
        pattern.initialize(sja, sjb)
    for pattern in antiPatternList:
        pattern.initialize(sja, sjb)

    pairPapers = Pair(filePath)

    pairPapers.testAll(patternList, antiPatternList, out_path)
예제 #5
0
def execute(filePath, postOutDir = ""):
	#Extraction of subject names
	names = pp.getNames(filePath)
	sja, sjb = stemmer.stem(names[0][0]), stemmer.stem(names[1][0])
	#Creation of 
	if postOutDir != "" and postOutDir[-1] != "/":
		postOutDir += "/"
	outPath = outDir + postOutDir + makeName(names[0][0], names[1][0])

	patternList = makePatterns(patterns)
	patternList += makeNpatterns(nPatterns)
	antiPatternList = makePatterns(antiPatterns)
	# reverseList = makePatterns(patterns)
	for pattern in patternList:
		pattern.initialize(sja, sjb)
	for pattern in antiPatternList:
		pattern.initialize(sja, sjb)




	pairPapers = Pair(filePath)

	pairPapers.testAll(patternList, antiPatternList, outPath)