예제 #1
0
	def getSeries(self, response):
		sel = Selector(response)

		item = {}
		item['brand'] = sel.xpath('//h2[@class="fn-left name"]/a/text()').extract()[0]

		db = SimpleMysql(host = host, db = dbname, user = user, passwd = pswd)
		fs = sel.xpath('//div[@class="carbradn-cont fn-clear"]/dl')
		for f in fs:
			item['factory'] = f.xpath('dt/a/text()').extract()[0]
			ts = f.xpath('dd/div[@class="list-dl-name"]')
			ss = f.xpath('dd/div[@class="list-dl-text"]')

			for i in range(len(ts)):
				item['cartype'] = ts[i].xpath('text()').extract()[0].replace(u':', '')
				temp1 = ''
				temp2 = ''
				qs = ss[i].xpath('a/text()').extract()
				for q in qs:
					if u'停售' in q:
						temp1 += q.replace(u'(停售)', '').strip() + ','
					else:
						temp2 += q + ','
				item['halts'] = temp1[:-1]
				item['series'] = temp2[:-1]
				db.insert(tablename, item)
				self.logger.info(item)
 def __init__(self):
     self.db = SimpleMysql(host='127.0.0.1',
                           charset='utf8',
                           db='nissan_group',
                           user='******',
                           passwd='dndcadmin88*..',
                           autocommit=True,
                           keep_alive=False)
예제 #3
0
 def connect_mysql():
     Container._db = SimpleMysql(
         host="localhost",
         db="platetype",
         user="******",
         passwd="plate",
         keep_alive=True  # try and reconnect timedout mysql connections?
     )
     Container._dbtype = 'mysql'
예제 #4
0
    def parsePrice(self, response):
        sel = Selector(response)
        trs = sel.xpath('//div[@class="carprice-cont"]/dl[@class="price-dl"]')
        item = AutohomeAllPriceItem()
        item['city'] = sel.xpath(
            '//div[@class="breadnav"]/a[2]/text()').extract()[0]
        item['dealer'] = sel.xpath(
            '//div[@class="text-main"]/text()').extract()[0]
        item['dealerid'] = sel.xpath(
            '//li[@id="nav_0"]/a/@href').extract()[0].replace('/', '')
        tmp = sel.xpath('//div[@class="brandtree-name"]')
        tmps = ''
        for t in tmp:
            tmps += t.xpath('p[@class="text"]/text()').extract()[0] + ','
        item['manu'] = tmps[:-1]
        log.msg(item['city'] + ', ' + item['dealer'] + ', ' + item['manu'])

        db = SimpleMysql(host='127.0.0.1:5029',
                         db='wholenetwork',
                         user='******',
                         passwd='')
        for tr in trs:
            item['brand'] = tr.xpath(
                'dt[@class="fn-clear"]/div[@class="name"]/p/a/text()').extract(
                )[0]
            item['brandid'] = filt(
                tr.xpath('dt[@class="fn-clear"]/div[@class="name"]/p/a/@href').
                extract()[0], 'cn/', '/')

            prices = tr.xpath('dd/table/tr')
            for price in prices:
                tmp = price.xpath('td[2]/p/text()').extract()
                if not tmp: continue  # filt th row
                else: item['oprice'] = tmp[0]
                item['oprice'] = item['oprice'].replace(u'万', '')
                tmp = price.xpath(
                    'td[3]/div[@class="this-number red"]/a[1]/text()').extract(
                    )
                if not tmp: tmp = price.xpath('td[3]/p/a/text()').extract()
                item['price'] = tmp[0].replace(u'万', '').replace(u' ', '')
                item['pubdate'] = price.xpath(
                    'td[5]/text()').extract()[0].replace(u' ', '').replace(
                        '\r\n', '')
                tmp = price.xpath('td[1]/a/text()').extract()[0]
                item['model'] = tmp[:tmp.find('<')]
                item['modelid'] = filt(
                    price.xpath('td[1]/a/@href').extract()[0], 'spec_', '.')

                if ISSAVE: db.insert('autohome_allprice', item)

                if ISPOST:
                    tmb = doPost(API_ADDRESS, item)
                    log.msg('\t' + str(tmb['error']) + ', ' + tmb['msg'])
예제 #5
0
    def parsePrice(self, response):
        sel = Selector(response)

        item = BitautoAllPriceItem()
        item['city'] = filt(
            sel.xpath('//div[@class="adress"]/text()').extract()[0], u'地址:',
            u'市')
        item['dealer'] = sel.xpath(
            '//div[@class="info"]/h1/text()').extract()[0]
        item['dealerid'] = filt(response.url, '.com/', '/')

        db = SimpleMysql(host='127.0.0.1:5029',
                         db='wholenetwork',
                         user='******',
                         passwd='')
        trs = sel.xpath('//div[@class="car_list"]')
        for tr in trs:
            tmp = tr.xpath('div/div[@class="car_top"]/h3/a')
            item['brand'] = tmp.xpath('text()').extract()[0]
            item['brandid'] = filt(
                tmp.xpath('@href').extract()[0], 'cars_', '.html')
            prices = tr.xpath('div/div[@class="car_price"]/table/tbody/tr')
            for price in prices:
                if not price.xpath('td'): continue  # filt th rows
                item['model'] = price.xpath('td[1]/a/@title').extract()[0]
                item['modelid'] = filt(
                    price.xpath('td[1]/a/@href').extract()[0], 'price_detail/',
                    '.html')
                item['oprice'] = price.xpath(
                    'td[2]/text()').extract()[0].replace(u' ', '').replace(
                        '\r\n', '').replace(u'万', '')
                item['price'] = price.xpath(
                    'td[4]/a/text()').extract()[0].replace('\r\n', '').replace(
                        u' ', '').replace(u'万', '')
                item['off'] = price.xpath(
                    'td[3]/em/text()').extract()[0].replace(
                        '\r\n', '').replace(u' ',
                                            '').replace(u'万',
                                                        '').replace(u'↓', '')

                if ISSAVE: doSave(db, item)
                if ISPOST: doPost(API_ADDRESS, item)

        np = sel.xpath('//div[@id="pager"]/a')
        while np and (np[-1].xpath('text()').extract()[0] == u'下一页'):
            url = np[-1].xpath('@href').extract()[0]
            url = response.urljoin(url)
            yield Request(url, self.parsePrice)
예제 #6
0
파일: db.py 프로젝트: kregor/zipnish
    def __init__(self, **keyVals):
        # saving database parameters
        self.dbParams = keyVals

        # table information
        self.tablePrefix = 'zipnish_'
        self.tables = ['spans', 'annotations']

        # connect to database
        self.db = SimpleMysql(host=keyVals['host'],
                              db=keyVals['db'],
                              user=keyVals['user'],
                              passwd=keyVals['passwd'],
                              keep_alive=keyVals['keep_alive'])

        self.__create_tables()
예제 #7
0
    def parsePrice(self, response):
        sel = Selector(response)

        item = AutohomeAllPriceItem()
        item['city'] = sel.xpath('//div[@class="breadnav"]/a[2]/text()').extract()[0]
        item['dealer'] = sel.xpath('//div[@class="text-main"]/text()').extract()[0]
        item['dealerid'] = sel.xpath('//li[@id="nav_0"]/a/@href').extract()[0].replace('/', '')
        tmp = sel.xpath('//div[@class="brandtree-name"]')

        tmps = ''
        for t in tmp: tmps += t.xpath('p[@class="text"]/text()').extract()[0] + ','
        item['manu'] = tmps[:-1]
        self.logger.info(u'经销商:' + item['dealer'] + u',\t\t\t\t主营品牌:' + item['manu'])

        trs = sel.xpath('//div[@class="carprice-cont"]/dl[@class="price-dl"]')
        for tr in trs:
            item['brand'] = tr.xpath('dt[@class="fn-clear"]/div[@class="name"]/p/a/text()').extract()[0]
            item['brandid'] = filt(tr.xpath('dt[@class="fn-clear"]/div[@class="name"]/p/a/@href').extract()[0], 'cn/', '/')
            item['cartype'] = tr.xpath('dt/div[@class="info"]/p[2]/text()').extract()[0]

            db = SimpleMysql(host = host, db = dbname, user = user, passwd = pswd)
            prices = tr.xpath('dd/table/tr')
            for price in prices:
                tmp = price.xpath('td[2]/p/text()').extract()
                if not tmp: continue  # filt th row
                item['oprice'] = tmp[0].replace(u'万','')

                tmp = price.xpath('td[3]/div[@class="this-number red"]/a/text()').extract()
                if not tmp: tmp = price.xpath('td[3]/p/a/text()').extract()
                item['price'] = tmp[0].replace(u'万','').strip()

                tmp = price.xpath('td[1]/a/text()').extract()[0]
                item['model'] = tmp[:tmp.find('<')]

                item['modelid'] = filt(price.xpath('td[1]/a/@href').extract()[0], 'spec_', '.')

                db.insert(tablename, item)
예제 #8
0
 def __init__(self):
     dbc = ini.get_items('MySQL')
     self.db = SimpleMysql(host=dbc['host'],
                           db=dbc['db'],
                           user=dbc['user'],
                           passwd=dbc['passwd'])
예제 #9
0
 def setUpClass(cls):
     cls.simplemysql = SimpleMysql(lambda: sqlite3.connect(':memory:'),
                                   DialectSQLite3())
예제 #10
0
def connect(host, db, user, passwd):
    print '[INFO]', 'Connecting to host', host, '...'
    tmp = SimpleMysql(host=host, db=db, user=user, passwd=passwd)
    if tmp: print '[INFO]', 'Connected to host', host, '.'
    return tmp
예제 #11
0
# -*- coding: utf-8 -*-
import scrapy
import re
import json
from scrapy.selector import Selector
from scrapy.http import Request
from simplemysql import SimpleMysql

def regx(patern, string):
    regx = re.findall(re.compile(patern, re.IGNORECASE), string.strip())
    return regx and regx[0] or None

_db = SimpleMysql(host='127.0.0.1', db='autohome', user='******', passwd='root', autocommit=True)

class CarSpider(scrapy.Spider):
    name = "car"
    allowed_domains = ['www.autohome.com.cn', 'k.autohome.com.cn']
    start_urls = [
        # 'http://www.autohome.com.cn/grade/carhtml/R.html',
        'http://www.autohome.com.cn/grade/carhtml/'+C+'.html' for C in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
    ]

    def parse(self, response):
        # yield Request('http://k.autohome.com.cn/121/', self.parse_koubei)
        # return
        sel = Selector(response)
        brands = sel.xpath('//dl')
        item = {}
        for brand in brands:
            brand_name = brand.xpath('dt/div/a/text()').extract()
            item['brand_name'] = brand_name and brand_name[0] or None
예제 #12
0
#!/usr/bin/env python
# coding: utf-8

import sys
from simplemysql import SimpleMysql

if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf-8')
    print sys.argv[1]
    db = SimpleMysql(host='127.0.0.1',
                     user='******',
                     passwd='root',
                     db='databank')
    for i in open(sys.argv[1]):
        try:
            t = i.strip('\r\n').split('\t')
            r = {}
            r['username'] = t[0]
            r['password'] = t[1]
            r['email'] = t[2]
            db.insert('aipai', r)
        except Exception, e:
            continue
    db.commit()
예제 #13
0
 def setUpClass(cls):
     cls.simplemysql = SimpleMysql(_connection_factory)
예제 #14
0
def u(s, encoding):
	if isinstance(s, unicode): return s
	return unicode(s, encoding)

def post(data):
	f = urllib2.urlopen(
		'',
		urllib.urlencode(data)
	)
	return f.read()

if __name__ == '__main__':
	import sys
	reload(sys)
	sys.setdefaultencoding('utf-8')
	conn = SimpleMysql(host="127.0.0.1", db='locoyspider', user='******', passwd='root')
	results = conn.getAll("data_content_153", ['dealer', 'dealerid', 'modelid', 'model', 'price', 'oprice'])

	a = open('r.txt', 'w')
	for result in results:
		data = {}
		data['dealer'] = result[0].encode('utf-8')
		data['dealerid'] = result[1].encode('utf-8')
		data['modelid'] = result[2].encode('utf-8')
		data['model'] = result[3].encode('utf-8')
		data['price'] = result[4].encode('utf-8')
		data['oprice'] = result[5].encode('utf-8')
		r = post(data)
		print r
		a.write(r + '\n')
	a.close()
예제 #15
0
# scrapy crawl autohomeallpromotion -s JOBDIR=autohomeallpromotion

import sys, datetime, urllib, urllib2, json
from scrapy.spider import BaseSpider
from scrapy.selector import Selector
from scrapy.http import Request
from chebaba.items import AutohomeAllPromotionTitleItem
from simplemysql import SimpleMysql
from HTMLParser import HTMLParser

ISSAVE = False
ISPOST = False
NISSAN_ONLY = False
if ISSAVE:
    db = SimpleMysql(host='127.0.0.1:5029',
                     db='wholenetwork',
                     user='******',
                     passwd='')


def doSave(item):
    #return db.insert('autohome_allpromotiontitle', item)
    return db.insertOrUpdate('autohome_allpromotiontitle', item,
                             ['titleid', 'pubdate'])


def getBrands(array):
    if not array: return None
    brands = []
    for a in array:
        if a:
            brands.append(a.extract())
예제 #16
0
def connect(host, db, user, passwd):
    logging.info('Connecting to host ' + host + '...')
    tmp = SimpleMysql(host=host, db=db, user=user, passwd=passwd)
    if tmp: logging.info('Connected to host ' + host + '.')
    return tmp
예제 #17
0
 def setUpClass(cls):
     cls.simplemysql = SimpleMysql(_connection_factory, DialectPostgres())