def getSeries(self, response): sel = Selector(response) item = {} item['brand'] = sel.xpath('//h2[@class="fn-left name"]/a/text()').extract()[0] db = SimpleMysql(host = host, db = dbname, user = user, passwd = pswd) fs = sel.xpath('//div[@class="carbradn-cont fn-clear"]/dl') for f in fs: item['factory'] = f.xpath('dt/a/text()').extract()[0] ts = f.xpath('dd/div[@class="list-dl-name"]') ss = f.xpath('dd/div[@class="list-dl-text"]') for i in range(len(ts)): item['cartype'] = ts[i].xpath('text()').extract()[0].replace(u':', '') temp1 = '' temp2 = '' qs = ss[i].xpath('a/text()').extract() for q in qs: if u'停售' in q: temp1 += q.replace(u'(停售)', '').strip() + ',' else: temp2 += q + ',' item['halts'] = temp1[:-1] item['series'] = temp2[:-1] db.insert(tablename, item) self.logger.info(item)
def __init__(self): self.db = SimpleMysql(host='127.0.0.1', charset='utf8', db='nissan_group', user='******', passwd='dndcadmin88*..', autocommit=True, keep_alive=False)
def connect_mysql(): Container._db = SimpleMysql( host="localhost", db="platetype", user="******", passwd="plate", keep_alive=True # try and reconnect timedout mysql connections? ) Container._dbtype = 'mysql'
def parsePrice(self, response): sel = Selector(response) trs = sel.xpath('//div[@class="carprice-cont"]/dl[@class="price-dl"]') item = AutohomeAllPriceItem() item['city'] = sel.xpath( '//div[@class="breadnav"]/a[2]/text()').extract()[0] item['dealer'] = sel.xpath( '//div[@class="text-main"]/text()').extract()[0] item['dealerid'] = sel.xpath( '//li[@id="nav_0"]/a/@href').extract()[0].replace('/', '') tmp = sel.xpath('//div[@class="brandtree-name"]') tmps = '' for t in tmp: tmps += t.xpath('p[@class="text"]/text()').extract()[0] + ',' item['manu'] = tmps[:-1] log.msg(item['city'] + ', ' + item['dealer'] + ', ' + item['manu']) db = SimpleMysql(host='127.0.0.1:5029', db='wholenetwork', user='******', passwd='') for tr in trs: item['brand'] = tr.xpath( 'dt[@class="fn-clear"]/div[@class="name"]/p/a/text()').extract( )[0] item['brandid'] = filt( tr.xpath('dt[@class="fn-clear"]/div[@class="name"]/p/a/@href'). extract()[0], 'cn/', '/') prices = tr.xpath('dd/table/tr') for price in prices: tmp = price.xpath('td[2]/p/text()').extract() if not tmp: continue # filt th row else: item['oprice'] = tmp[0] item['oprice'] = item['oprice'].replace(u'万', '') tmp = price.xpath( 'td[3]/div[@class="this-number red"]/a[1]/text()').extract( ) if not tmp: tmp = price.xpath('td[3]/p/a/text()').extract() item['price'] = tmp[0].replace(u'万', '').replace(u' ', '') item['pubdate'] = price.xpath( 'td[5]/text()').extract()[0].replace(u' ', '').replace( '\r\n', '') tmp = price.xpath('td[1]/a/text()').extract()[0] item['model'] = tmp[:tmp.find('<')] item['modelid'] = filt( price.xpath('td[1]/a/@href').extract()[0], 'spec_', '.') if ISSAVE: db.insert('autohome_allprice', item) if ISPOST: tmb = doPost(API_ADDRESS, item) log.msg('\t' + str(tmb['error']) + ', ' + tmb['msg'])
def parsePrice(self, response): sel = Selector(response) item = BitautoAllPriceItem() item['city'] = filt( sel.xpath('//div[@class="adress"]/text()').extract()[0], u'地址:', u'市') item['dealer'] = sel.xpath( '//div[@class="info"]/h1/text()').extract()[0] item['dealerid'] = filt(response.url, '.com/', '/') db = SimpleMysql(host='127.0.0.1:5029', db='wholenetwork', user='******', passwd='') trs = sel.xpath('//div[@class="car_list"]') for tr in trs: tmp = tr.xpath('div/div[@class="car_top"]/h3/a') item['brand'] = tmp.xpath('text()').extract()[0] item['brandid'] = filt( tmp.xpath('@href').extract()[0], 'cars_', '.html') prices = tr.xpath('div/div[@class="car_price"]/table/tbody/tr') for price in prices: if not price.xpath('td'): continue # filt th rows item['model'] = price.xpath('td[1]/a/@title').extract()[0] item['modelid'] = filt( price.xpath('td[1]/a/@href').extract()[0], 'price_detail/', '.html') item['oprice'] = price.xpath( 'td[2]/text()').extract()[0].replace(u' ', '').replace( '\r\n', '').replace(u'万', '') item['price'] = price.xpath( 'td[4]/a/text()').extract()[0].replace('\r\n', '').replace( u' ', '').replace(u'万', '') item['off'] = price.xpath( 'td[3]/em/text()').extract()[0].replace( '\r\n', '').replace(u' ', '').replace(u'万', '').replace(u'↓', '') if ISSAVE: doSave(db, item) if ISPOST: doPost(API_ADDRESS, item) np = sel.xpath('//div[@id="pager"]/a') while np and (np[-1].xpath('text()').extract()[0] == u'下一页'): url = np[-1].xpath('@href').extract()[0] url = response.urljoin(url) yield Request(url, self.parsePrice)
def __init__(self, **keyVals): # saving database parameters self.dbParams = keyVals # table information self.tablePrefix = 'zipnish_' self.tables = ['spans', 'annotations'] # connect to database self.db = SimpleMysql(host=keyVals['host'], db=keyVals['db'], user=keyVals['user'], passwd=keyVals['passwd'], keep_alive=keyVals['keep_alive']) self.__create_tables()
def parsePrice(self, response): sel = Selector(response) item = AutohomeAllPriceItem() item['city'] = sel.xpath('//div[@class="breadnav"]/a[2]/text()').extract()[0] item['dealer'] = sel.xpath('//div[@class="text-main"]/text()').extract()[0] item['dealerid'] = sel.xpath('//li[@id="nav_0"]/a/@href').extract()[0].replace('/', '') tmp = sel.xpath('//div[@class="brandtree-name"]') tmps = '' for t in tmp: tmps += t.xpath('p[@class="text"]/text()').extract()[0] + ',' item['manu'] = tmps[:-1] self.logger.info(u'经销商:' + item['dealer'] + u',\t\t\t\t主营品牌:' + item['manu']) trs = sel.xpath('//div[@class="carprice-cont"]/dl[@class="price-dl"]') for tr in trs: item['brand'] = tr.xpath('dt[@class="fn-clear"]/div[@class="name"]/p/a/text()').extract()[0] item['brandid'] = filt(tr.xpath('dt[@class="fn-clear"]/div[@class="name"]/p/a/@href').extract()[0], 'cn/', '/') item['cartype'] = tr.xpath('dt/div[@class="info"]/p[2]/text()').extract()[0] db = SimpleMysql(host = host, db = dbname, user = user, passwd = pswd) prices = tr.xpath('dd/table/tr') for price in prices: tmp = price.xpath('td[2]/p/text()').extract() if not tmp: continue # filt th row item['oprice'] = tmp[0].replace(u'万','') tmp = price.xpath('td[3]/div[@class="this-number red"]/a/text()').extract() if not tmp: tmp = price.xpath('td[3]/p/a/text()').extract() item['price'] = tmp[0].replace(u'万','').strip() tmp = price.xpath('td[1]/a/text()').extract()[0] item['model'] = tmp[:tmp.find('<')] item['modelid'] = filt(price.xpath('td[1]/a/@href').extract()[0], 'spec_', '.') db.insert(tablename, item)
def __init__(self): dbc = ini.get_items('MySQL') self.db = SimpleMysql(host=dbc['host'], db=dbc['db'], user=dbc['user'], passwd=dbc['passwd'])
def setUpClass(cls): cls.simplemysql = SimpleMysql(lambda: sqlite3.connect(':memory:'), DialectSQLite3())
def connect(host, db, user, passwd): print '[INFO]', 'Connecting to host', host, '...' tmp = SimpleMysql(host=host, db=db, user=user, passwd=passwd) if tmp: print '[INFO]', 'Connected to host', host, '.' return tmp
# -*- coding: utf-8 -*- import scrapy import re import json from scrapy.selector import Selector from scrapy.http import Request from simplemysql import SimpleMysql def regx(patern, string): regx = re.findall(re.compile(patern, re.IGNORECASE), string.strip()) return regx and regx[0] or None _db = SimpleMysql(host='127.0.0.1', db='autohome', user='******', passwd='root', autocommit=True) class CarSpider(scrapy.Spider): name = "car" allowed_domains = ['www.autohome.com.cn', 'k.autohome.com.cn'] start_urls = [ # 'http://www.autohome.com.cn/grade/carhtml/R.html', 'http://www.autohome.com.cn/grade/carhtml/'+C+'.html' for C in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] ] def parse(self, response): # yield Request('http://k.autohome.com.cn/121/', self.parse_koubei) # return sel = Selector(response) brands = sel.xpath('//dl') item = {} for brand in brands: brand_name = brand.xpath('dt/div/a/text()').extract() item['brand_name'] = brand_name and brand_name[0] or None
#!/usr/bin/env python # coding: utf-8 import sys from simplemysql import SimpleMysql if __name__ == '__main__': reload(sys) sys.setdefaultencoding('utf-8') print sys.argv[1] db = SimpleMysql(host='127.0.0.1', user='******', passwd='root', db='databank') for i in open(sys.argv[1]): try: t = i.strip('\r\n').split('\t') r = {} r['username'] = t[0] r['password'] = t[1] r['email'] = t[2] db.insert('aipai', r) except Exception, e: continue db.commit()
def setUpClass(cls): cls.simplemysql = SimpleMysql(_connection_factory)
def u(s, encoding): if isinstance(s, unicode): return s return unicode(s, encoding) def post(data): f = urllib2.urlopen( '', urllib.urlencode(data) ) return f.read() if __name__ == '__main__': import sys reload(sys) sys.setdefaultencoding('utf-8') conn = SimpleMysql(host="127.0.0.1", db='locoyspider', user='******', passwd='root') results = conn.getAll("data_content_153", ['dealer', 'dealerid', 'modelid', 'model', 'price', 'oprice']) a = open('r.txt', 'w') for result in results: data = {} data['dealer'] = result[0].encode('utf-8') data['dealerid'] = result[1].encode('utf-8') data['modelid'] = result[2].encode('utf-8') data['model'] = result[3].encode('utf-8') data['price'] = result[4].encode('utf-8') data['oprice'] = result[5].encode('utf-8') r = post(data) print r a.write(r + '\n') a.close()
# scrapy crawl autohomeallpromotion -s JOBDIR=autohomeallpromotion import sys, datetime, urllib, urllib2, json from scrapy.spider import BaseSpider from scrapy.selector import Selector from scrapy.http import Request from chebaba.items import AutohomeAllPromotionTitleItem from simplemysql import SimpleMysql from HTMLParser import HTMLParser ISSAVE = False ISPOST = False NISSAN_ONLY = False if ISSAVE: db = SimpleMysql(host='127.0.0.1:5029', db='wholenetwork', user='******', passwd='') def doSave(item): #return db.insert('autohome_allpromotiontitle', item) return db.insertOrUpdate('autohome_allpromotiontitle', item, ['titleid', 'pubdate']) def getBrands(array): if not array: return None brands = [] for a in array: if a: brands.append(a.extract())
def connect(host, db, user, passwd): logging.info('Connecting to host ' + host + '...') tmp = SimpleMysql(host=host, db=db, user=user, passwd=passwd) if tmp: logging.info('Connected to host ' + host + '.') return tmp
def setUpClass(cls): cls.simplemysql = SimpleMysql(_connection_factory, DialectPostgres())