Beispiel #1
0
    '%d0%b3%d1%80%d1%8b%d0%bd%d1%8c',                                     # Gryn`          178 OKc
    '%D0%BB%D0%B5%D1%89%D1%83%D0%BA',                                     # Leschuk       3929 OKc
    '%D0%9A%D0%B0%D0%BC%D0%BB%D0%B8%D1%87%D0%B5%D0%BD%D0%BA%D0%BE',       # Kamlichenko      5 OKc
    '%D0%9A%D0%BE%D0%BC%D0%BB%D0%B8%D1%87%D0%B5%D0%BD%D0%BA%D0%BE',       # Komlichenko    440 OKc
    '%D0%9A%D0%B0%D0%BB%D0%B5%D0%BD%D0%B8%D1%87%D0%B5%D0%BD%D0%BA%D0%BE', # Kalenichenko  2019 OKc
    '%D0%9A%D0%B0%D0%BB%D0%B8%D0%BD%D0%B8%D1%87%D0%B5%D0%BD%D0%BA%D0%BE'] # Kalinichenko 20k
    # Nadijka, Nadejka 0; Abazovka - net poiska po nasel punktu
    # Moroz Vadim 200 chel, Andr 994, vlad 2400; pavel 400, mixail 1200,maks 200,roman 407, anat 1208, evg 401, taisia 71, Lubov 1001, tatyana 2000, dmit 700

  k0=1; link0='http://nomerorg.com/allukraina/lastName_'+name_list[k0]+'_pagenumber_';
  nm=1+int(65645/15);
  k_proxy=0; u.install_opener(u.build_opener(u.ProxyHandler({'http': proxy_list[k_proxy]})))
  for k in range(877,nm):
    link1=link0+str(k)+'.html'; flag1=False; fd=open(dir0+'name'+str(k0)+'_'+str(10000+k)+'_.csv','w'); print k,k_proxy
    while not flag1:
      try:    url1=u.urlopen(u.Request(link1,headers=url_hdr),timeout=3); page1=html.fragments_fromstring(url1.read()); flag1=(1<len(page1));
      except: k_proxy+=1; u.install_opener(u.build_opener(u.ProxyHandler({'http': proxy_list[k_proxy]}))); print k,k_proxy # print HTMLParser().unescape(u.unquote(page1[0]))
    o=html.tostring(page1[3]); o=o[:o.find('</table>')-10].replace('</td><td>',',').replace('</td></tr><tr><td>','\n');
    o=o[o.rfind('/th></tr><tr><td>')+17:]+'\n'; o=HTMLParser().unescape(u.unquote(o));
    if -1<o.find('adsbygoogle'): break
    fd.write(o.encode('utf-16')); time.sleep(.1); fd.close();
  # Forbidden (blocked py queries)/ connection refused (try again)
  #--- consolidate files
  k=1; o1=glob.glob(dir0+'name1/name'+str(k)+'_1*.csv')
  fd=open(dir0+'name'+str(k)+'.csv','wb');
  for q in o1: fd_in=open(q,'rb'); o=fd_in.read(); fd_in.close(); fd.write(o);
  fd.close();