def getResult(self,key): "This method return the result in imdb for given key" spider = ImdbSpider(key) result_queue = Queue() crawler = CrawlerWorker(spider, result_queue) crawler.start() results = result_queue.get() if len(results)>self.maxResult : del results[self.maxResult:] logging.debug('%s results', len(results)) return results
spider = AuctionViewItemPageSpider(startUrl = auctionUrl, itemno = itemno, kindOf="auction") if "gmarket.co.kr" in startUrl: itemno = re.search(r"goodscode=[0-9]+",startUrl.lower()).group().replace("goodscode=", "") gmarketUrl = "http://mitem.gmarket.co.kr/Item?goodsCode=" + itemno spider = GmarketViewItemPageSpider(startUrl = gmarketUrl, itemno = itemno, kindOf="gmarket") if "g9.co.kr" in startUrl: itemno = re.search(r"[0-9]+",startUrl).group() spider = G9ViewItemPageSpider(startUrl = startUrl.encode('utf-8'), itemno = itemno, kindOf="g9") if "coupang.com" in startUrl: itemno = re.search(r"[0-9]+",startUrl).group() spider = CoupangViewItemPageSpider(startUrl = startUrl.encode('utf-8'), itemno = itemno, kindOf="coupang") if "ticketmonster.co.kr" in startUrl: itemno = re.search(r"[0-9]+",startUrl).group() spider = TmonViewItemPageSpider(startUrl = startUrl.encode('utf-8'), itemno = itemno, kindOf="tmon") resultQueue = Queue() crawler = CrawlerWorker(spider, resultQueue) crawler.start() items = resultQueue.get() body = {} if(len(items) > 0): body = json.dumps(items[0].__dict__.get('_values')) print "Content-Type: application/json" print "Length:", len(body) print "" print body
def process_new_config(config): wokrer = CrawlerWorker() wokrer.run(config)
import json import uuid from crawler_worker import CrawlerWorker with open('irr_config.json', 'r') as json_data: config = json.loads(json_data.read()) config['id'] = str(uuid.uuid4()) wokrer = CrawlerWorker() items = wokrer.run(config) print len(items)