def process_item(self, item, spider): self.table = "`proxypool_us`" self.db = dbSettings.db_connect() self.cursor = self.db.cursor() self.mapTable = "`map_us`" self.listTable = "`houselist_us`" self.mapresponseTable = "`mapresponse_us`" self.calendarResponseTable = "`calendarresponse_us`" if item.__class__ == listItem: st = time.time() res = item['response'] sql = "INSERT INTO " + self.mapresponseTable + " VALUES (NULL ,%s)" self.cursor.execute(sql, (res, )) self.db.commit() # print("item time:"+str(int(1000*(time.time()-st)))+"ms") return elif item.__class__ == calendarItem: dbCalendarInsert(item['house_id'], item['response']) print("house id: ", item['house_id'], "len of response: ", len(item['response'])) elif item.__class__ == detailItem: dbDetailInsert(item['house_id'], item['response']) print(item['house_id']) return item
def process_item(self, item, spider): self.table = "`proxypool`" self.db = dbSettings.db_connect() self.cursor = self.db.cursor() self.mapTable = "`map`" self.listTable = "`houselist`" self.mapresponseTable = "`mapresponse`" self.calendarResponseTable = "`calendarresponse`" if item.__class__ == listItem: st = time.time() res = item['response'].replace("'", "''") res = res.replace('"', '""') sql = "INSERT INTO " + self.mapresponseTable + " VALUES (NULL ,'{}')".format( res) self.cursor.execute(sql) self.db.commit() # print("item time:"+str(int(1000*(time.time()-st)))+"ms") return res = json.loads(item['response']) if 'home_tab_metadata' in res['explore_tabs'][0]: count = res['explore_tabs'][0]['home_tab_metadata'][ 'listings_count'] sections = res['explore_tabs'][0]['sections'] for section in sections: self.exist = 0 self.insert = 0 if 'listings' in section: self.inDB = "" listings = section['listings'] for listing in listings: try: self.decodeListing(listing) except Exception as e: print( str(e), "for listing", time.asctime(time.localtime(time.time()))) print(" count;{} 共{}个,其中重复{},新增{},{}".format( count, str(len(listings)), self.exist, self.insert, self.inDB)) else: print("房源list解码异常") self.dbUpdateStates("done") elif item.__class__ == calendarItem: th = threading.Thread(target=dbCalendarInsert, args=(item['house_id'], item['response'])) th.start() print(item['house_id']) # sql = "INSERT IGNORE INTO "+self.calendarResponseTable+" (id, house_id, response) VALUES " \ # "(NULL,'{}','{}')".format(item['house_id'], item['response']) # self.cursor.execute(sql) # self.db.commit() return item
def dbCalendarInsert(house_id, response): calendarResponseTable = "`calendarresponse`" db = dbSettings.db_connect() cursor = db.cursor() sql = "INSERT IGNORE INTO "+ calendarResponseTable+" (id, house_id, response) VALUES " \ "(NULL,'{}','{}')".format(house_id, response) cursor.execute(sql) db.commit() db.close()
def dbCalendarInsert(house_id, response): calendarResponseTable = "`calendarresponse_us`" db = dbSettings.db_connect() cursor = db.cursor() sql = "INSERT INTO "+ calendarResponseTable+" (id, house_id, response) VALUES " \ "(NULL,%s,%s)" cursor.execute(sql, (house_id, response)) db.commit() db.close()
def dbDetailInsert(house_id, response): # response = response.replace("'", "''").replace('"', '""') detailResponseTable = "`detailresponse`" db = dbSettings.db_connect() cursor = db.cursor() sql = "INSERT INTO "+ detailResponseTable+" (id, house_id, response) VALUES " \ "(NULL,%s,%s)" cursor.execute(sql, (house_id, response)) db.commit() db.close()
def __init__(self): self.lat_low = 0.0 self.lat_upp = 0.0 self.lon_low = 0.0 self.lon_upp = 0.0 self.id = 0 self.num = -1 self.db = dbSettings.db_connect() self.cursor = self.db.cursor() self.area = "" self.errInfo = "" self.url = "" self.json = "" self.html = "" self.table = "`map`" self.starttime = time.time()
def __init__(self): self.proxyId = 0 self.ip = "" self.table = "`proxypool`" self.db = dbSettings.db_connect() self.cursor = self.db.cursor()