Exemple #1
0
 def SearchCategorymember(self,categoryname):
     try:
         self.subcategoryresultlist.DeleteAllItems();
         self.pageresultlist.DeleteAllItems();
         wikiextractor = Extractor.wikiextractor()
         query = configuration.api_url_zh +'&list=categorymembers&cmtitle=Category:%s&cmsort=timestamp&' \
             'cmdir=desc&cmlimit=max' % categoryname
         json_content = wikiextractor.getjson(query)
         
         members = json_content['query']['categorymembers']
         
         for member in members:
             #TODO:如果没有Category属性,无法判断是否为子类(?)
             pageid = str(member['pageid'])
             
             if 'Category:' in member['title']:                    
                 subcategory = member['title'].lstrip('Category:')
                 index = self.subcategoryresultlist.InsertStringItem(sys.maxint, subcategory)
                 self.subcategoryresultlist.SetStringItem(index, 0, subcategory)                     
             else:
                 page = member['title']
                 #TODO:待完善
                 # 说明不是有效的page
                 if ':' in page:
                     continue
                 
                 index = self.pageresultlist.InsertStringItem(sys.maxint, pageid)
                 self.pageresultlist.SetStringItem(index, 0, pageid)
                 self.pageresultlist.SetStringItem(index, 1, page)     
     except Exception,e:
         self.statusbar.SetStatusText(e.message,0)
Exemple #2
0
 def OnGeoExtract(self,evt):
     try:
             wikiextractor = Extractor.wikiextractor()
             data_dict={}
             members=self.GetGeoList()
             wikiextractor.get_data_dict_from_pageid(members, data_dict,'f')
             
             filewriter=FileWriter.filewriter()
             filewriter.SaveToSQLite(data_dict)
             #filewriter.SaveToExcel(data_dict)
             self.statusbar.SetStatusText("保存成功,请检查excel文件",0)
     except Exception,e:            
         self.statusbar.SetStatusText(e.message,0)  
Exemple #3
0
 def OnExtract(self,evt):
     try:
             categoryname = self.categoryname.GetValue()
             wikiextractor = Extractor.wikiextractor()
             data_dict = {}
             if self.extractsubcategoryck.Get3StateValue() == wx.CHK_CHECKED:
                 wikiextractor.parse_members(categoryname, data_dict,'t')
             else:
                 wikiextractor.parse_members(categoryname, data_dict,'f')
             filewriter=FileWriter.filewriter()
             #filewriter.SaveToSQLite(data_dict)
             filewriter.SaveToExcel(data_dict)
             self.statusbar.SetStatusText(u"保存成功,请检查excel文件",0)
     except Exception,e:            
         self.statusbar.SetStatusText(e.message,0)   
Exemple #4
0
 def SearchbyPrex(self,prex):
     try:
         self.subcategoryresultlist.DeleteAllItems();
                     
         wikiextractor = Extractor.wikiextractor()
         query = configuration.api_url_zh + '&list=allcategories&acprefix=%s'% prex
         json_content = wikiextractor.getjson(query)
         members = json_content['query']['allcategories']
         
         for member in members:
             #TODO:如果没有Category属性,无法判断是否为子类(?)
             category = member['*']  
             index = self.subcategoryresultlist.InsertStringItem(sys.maxint, category)
             self.subcategoryresultlist.SetStringItem(index, 0, category)    
     except Exception,e:            
         self.statusbar.SetStatusText(e.message,0)    
Exemple #5
0
 def SearchbyGeo(self, lat, lon,primay):
     try:
         self.geopageresultlist.DeleteAllItems();
         sourcelist = self.GetSourceList()
         
         wikiextractor = Extractor.wikiextractor()            
         query =""
         queries = {}
         pagedatalist=[]
         for source in sourcelist:
             lastcount =-1
             geopagelist = []
             while(len(geopagelist)<100):                    
                 while True:     
                     if primay:
                         query = source + '&list=geosearch&gscoord=%s|%s&gsradius=10000&gsglobe=earth&gsnamespace=0&gslimit=500&gsprop=dim&gsprimary=primary' %(lat,lon)
                     else:
                          query = source + '&list=geosearch&gscoord=%s|%s&gsradius=10000&gsglobe=earth&gsnamespace=0&gslimit=500&gsprop=dim&gsprimary=all' %(lat,lon)
                     
                     json_content = wikiextractor.getjson(query)
                     queries =  json_content['query']
                     nowcount =len(geopagelist)
                     if('geosearch' in queries.keys() and len(queries['geosearch'])>0):
                         break;
                     
                     lat+=0.003;
                     lon+=0.003;
 
                 if(nowcount==lastcount):
                      break;
                  
                 lastcount=len(geopagelist)   
                 pages = queries['geosearch']
                 for page in pages:
                     try:
                         #TODO:如果没有Category属性,无法判断是否为子类(?)                   
                         lat = page['lat']
                         lon = page['lon']
                         pageid = page['pageid']
                         strpageid=str(pageid).decode('utf-8')
                         title = page['title'].decode('utf8')
                         strlat = str(page['lat']).decode('utf-8')
                         strlon = str(page['lat']).decode('utf-8')
                         strdim=str(page['dim']).decode('utf-8')
                         if(pageid in self.geopagelist):
                             continue
                         else:
                             geopagelist.append(pageid)                     
                         #测试写入查询结果信息
                         pagedata = {}
                         pagedata[u'文章ID']= pageid
                         pagedata[u'标题']= title                        
                         pagedata[u'经度']= lon
                         pagedata[u'纬度']= lat
                         pagedata[u'大小']=  strdim
                         pagedatalist.append(pagedata);
                         
                         index = self.geopageresultlist.InsertStringItem(sys.maxint, strpageid)
                         self.geopageresultlist.SetStringItem(index, 0, strpageid)    
                         self.geopageresultlist.SetStringItem(index, 1, title)    
                         self.geopageresultlist.SetStringItem(index, 2, strlat)    
                         self.geopageresultlist.SetStringItem(index, 3, strlon)    
                         self.geopageresultlist.SetStringItem(index, 4, strdim)                
                     except Exception,e:            
                         continue
                                  
         #最后存储查询结果
         self.resultdict[u'查询结果']=pagedatalist