def __init__(self, filePath): #initalize the species sets sja, sjb = pp.getNames(filePath)[0], pp.getNames(filePath)[1] self.spSet1 = set(sja) self.spSet2 = set(sjb) self.spSet = self.spSet1.union(self.spSet2) #Load the papers self.spFile = pp.spFile(filePath, purge = True) #unified is a tuple: (spFile.papers[i], Paper) self.unified = [(i, Paper(i, self.spSet)) for i in self.spFile.papers]
def __init__(self, filePath, terms): #initalize the species sets self.terms = terms.split(':') #pad the tags with spaces self.terms = [i.lower() + ' ' * (4-len(i)) for i in self.terms] print(self.terms) sja, sjb = pp.getNames(filePath)[0], pp.getNames(filePath)[1] self.spSet1 = set(sja) self.spSet2 = set(sjb) self.spSet = self.spSet1.union(self.spSet2) #Load the papers self.rawPapers = pp.loadFile(filePath) #preprep the termDict self.papers = self.paperSplit(self.rawPapers) self.papers = [{i:j for i,j in zip(self.terms, k)} for k in self.papers] self.papers = [Paper(onePaper, self.spSet) for onePaper in self.papers ]
def debug(filePath): #Extraction of subject names names = pp.getNames(filePath) sja, sjb = names[0][0], names[1][0] #patternList = makePatterns(patterns) patternList = makeNpatterns(nPatterns) # reverseList = makePatterns(patterns) for pattern in patternList: pattern.initialize(sja, sjb) pairPapers = Pair(filePath) print(pairPapers.spFile.papers[0]) print(pairPapers.unified[0][1].sAbstract) print(patternList[0].pCheck(pairPapers.unified[0][1].sAbstract))
def execute(filePath, out_dir=""): #Extraction of subject names names = pp.getNames(filePath) sja, sjb = stemmer.stem(names[0][0]), stemmer.stem(names[1][0]) out_name = "{}#{}.json".format(sja.replace(" ", "_"), sjb.replace(" ", "_")) out_path = os.path.join(out_dir, out_name) patternList = makePatterns(patterns) patternList += makeNpatterns(nPatterns) antiPatternList = makePatterns(antiPatterns) for pattern in patternList: pattern.initialize(sja, sjb) for pattern in antiPatternList: pattern.initialize(sja, sjb) pairPapers = Pair(filePath) pairPapers.testAll(patternList, antiPatternList, out_path)
def execute(filePath, postOutDir = ""): #Extraction of subject names names = pp.getNames(filePath) sja, sjb = stemmer.stem(names[0][0]), stemmer.stem(names[1][0]) #Creation of if postOutDir != "" and postOutDir[-1] != "/": postOutDir += "/" outPath = outDir + postOutDir + makeName(names[0][0], names[1][0]) patternList = makePatterns(patterns) patternList += makeNpatterns(nPatterns) antiPatternList = makePatterns(antiPatterns) # reverseList = makePatterns(patterns) for pattern in patternList: pattern.initialize(sja, sjb) for pattern in antiPatternList: pattern.initialize(sja, sjb) pairPapers = Pair(filePath) pairPapers.testAll(patternList, antiPatternList, outPath)