def search(path): for filename in os.listdir(path): fp = os.path.join(path, filename) if os.path.isfile(fp): for i in types: if filename.endswith('.'+i): if filename.endswith('jpg') and filename[:1] in '1,2,3,4,5,6,7,8,9,0'.split(','): if filename not in gysimg:gysimg.append(filename) filename=T.sub(fp,basedir,'')[1:].replace('\\','/') funcname='fr_'+T.varname(filename) exec(sfr.format(filename,funcname)) print filename,funcname break elif os.path.isdir(fp): search(fp)
# print j.get('class') if(str(j.get('class')).find(u'in-reply-to')>0): rto=j.find(attrs={"class":"ng-binding"}) # rto=rto.get('href') # rto=str(rto) if(len(rto)<1):rto=None continue if(str(j.get('href')).find('http://www.zhihu.com/people/')!=-1): href=j.get('href');href=str(href) name=j.text.encode('gbk') continue # print name # if href not in ls:ls.append(href) href=T.sub(href,'/people/') rto=T.sub(rto,'/people/') href=href.replace('-','').replace('.','') if(rto!=None): rto=rto.replace('-','').replace('.','') print '%s->%s'%(href,rto) else: print href ls.append((href,rto)) print '}' U.resetOut() for j in ls : print j
#coding=utf-8 gsuser='******' gszhihu='https://www.zhihu.com/' gszp=gszhihu+'people/' gscmt0='''https://www.zhihu.com/node/AnswerCommentBoxV2?params=%7B%22answer_id%22%3A%22''' gscmt1='%22%2C%22load_all%22%3Atrue%7D' from bs4 import BeautifulSoup as bs import os,sys,re,urllib2,chardet from qgb import U,T if(len(sys.argv)==2): if(len(sys.argv[1])>0): gsuser=sys.argv[1] else:print 'Usage:',sys.argv[0],'zhihuUserUrl or id' if(gsuser.find(gszp)>-1): gsuser=T.sub(gsuser,gszp,'').strip() print 'Processing '+gsuser+' ....' # U.mkdir('zhihu') # os.chdir('zhihu') # U.mkdir(gsuser) # os.chdir(gsuser) i=0 while(True): break i+=1 fn='%s.html'%i url=gszp+gsuser+('/answers?page='+str(i)) # U.pln(url) # fh=open('%s.html'%i,'wb')
if(a[-4-5:]!='proxy.ini'): return False return True s127='''[listen] ip = 127.0.0.1''' s000='''[listen] ip = 0.0.0.0''' def rep(a): f=open(a) s=f.read() f.close() s=s.replace(s127,s000) f=open(a,'w') f.write(s) f.close() print s[:44] sq=r'"' if(len(sc)<11 or sc==None or sc.find('proxy.ini')==-1): sc=s # print sc ls=[] sc0=T.sub(sc,sq,sq) if(len(sc0)>5): for i in sc.split(sq): print U.ct() if(isp(i)): print i,'\n','='*33 rep(i)
sp=bs(s,"html.parser" ) im=0 ls=[] U.setOut('d:/test/dot/lby.dot') print 'digraph G {' for i in sp.find_all('li'): im+=1;si='_'+str(im);si='' aurl,burl=None,None for j in i.find_all(attrs={"class":"comment-hd"}): urls = re.findall(T.REURL, str(j),re.I)11 if(0<len(urls)<3): aurl=urls[0] try: if(len(urls[1])<1):break burl=urls[1] except:pass break aurl=T.sub(aurl,'/people/') burl=T.sub(burl,'/people/') aurl=si+aurl.replace('-','').replace('.','') if(burl!=None): burl=burl.replace('-','').replace('.','') print '%s->%s'%(aurl,burl) # else: # print aurl # ls.append((aurl,burl)) print '}' U.resetOut()