def exploreArticle(query): import xmlextract import wmapiurl import wmapifetch queryURL1 = wmapiurl.searchURL(query) sourcexml = wmapifetch.fetchURL(queryURL1) searchResult = xmlextract.extractSearch(sourcexml) labelList = {}#Create a dict to store the label-id pairs for labels in searchResult: if len(labels['labelSenses']) != 0: labelName = labels['labelName'] labelSense = labels['labelSenses'][0] artID = labelSense['artID'] labelList[labelName] = artID else: labelName = labels['labelName'] labelList[labelName] = 'no_matched_article' idlist = [] for pair in labelList: idlist.append(labelList[pair]) identity = idlist[0] queryURL2 = wmapiurl.expArtURL(identity) xmlfile = wmapifetch.fetchURL(identity) return xmlfile
def fetchDefinition(artid):#get the entry summery according to a provided identifier import xml.dom.minidom import wmapifetch query = '' queryhead = 'http://wikipedia-miner.cms.waikato.ac.nz/services/exploreArticle?definition=true&definitionLength=LONG&id=' query = queryhead + str(artid) xmlfile = wmapifetch.fetchURL(query)#below extracts xml file sourcefile = xml.dom.minidom.parse(xmlfile) rootnodes = sourcefile.documentElement#The root layer definitiontag = rootnodes.getElementsByTagName('definition') definition = '' if len(definitiontag) > 0: definitionup = definitiontag[0].childNodes if len(definitionup) > 0: definition = definitionup[0].data return definition
def searchWiki(query): import wmapiurl#Denpending modules import wmapifetch import xmlextract queryURL = wmapiurl.searchURL(query) sourcexml = wmapifetch.fetchURL(queryURL) searchResult = xmlextract.extractSearch(sourcexml) labelList = {}#Create a dict to store the label-id pairs for labels in searchResult: if len(labels['labelSenses']) != 0: labelName = labels['labelName'] labelSense = labels['labelSenses'][0] artID = labelSense['artID'] labelList[labelName] = artID else: labelName = labels['labelName'] labelList[labelName] = 'no_matched_article' return labelList
def article(identity, infotype): import wmapiurl import wmapifetch import xmlextract queryURL = wmapiurl.expArtURL(identity) doc = wmapifetch.fetchURL(queryURL) extractresult = xmlextract.extractExplore(doc) infotype = infotype.lower() if infotype == 'l': result = extractresult['labels'] elif infotype == 'p': result = extractresult['parentCategories'] elif infotype == 'i': result = extractresult['inLinks'] elif infotype == 'o': result = extractresult['outLinks'] else: result = False return result
def category(identity, infotype):#infotype for the exact info you want: parentcategories (p) or childcategories (c) import wmapiurl import wmapifetch import xmlextract queryURL = wmapiurl.expCatURL(identity) doc = wmapifetch.fetchURL(queryURL) extractresult = xmlextract.extractCategory(doc) infotype = infotype.lower() if infotype == 'p': tempresult = extractresult['parentCategories'] result = {} for i in tempresult: result[i['parentCategoryID']] = i['parentCategoryTitle'] elif infotype == 'c': tempresult = extractresult['childCategories'] result = {} for i in tempresult: result[i['childCategoryID']] = i['childCategoryTitle'] else: result = False return result