Beispiel #1
0
    def getResult(self,key):
        "This method return the result in imdb for given key"
        spider = ImdbSpider(key)
        result_queue = Queue()
        crawler = CrawlerWorker(spider, result_queue)
        crawler.start()
        results = result_queue.get()

        if len(results)>self.maxResult :
            del results[self.maxResult:]
        logging.debug('%s results', len(results))
        return results
Beispiel #2
0
  spider = AuctionViewItemPageSpider(startUrl = auctionUrl, itemno = itemno, kindOf="auction")

if "gmarket.co.kr" in startUrl:
  itemno = re.search(r"goodscode=[0-9]+",startUrl.lower()).group().replace("goodscode=", "")
  gmarketUrl = "http://mitem.gmarket.co.kr/Item?goodsCode=" + itemno
  spider = GmarketViewItemPageSpider(startUrl = gmarketUrl, itemno = itemno, kindOf="gmarket")

if "g9.co.kr" in startUrl:
  itemno = re.search(r"[0-9]+",startUrl).group()
  spider = G9ViewItemPageSpider(startUrl = startUrl.encode('utf-8'), itemno = itemno, kindOf="g9")

if "coupang.com" in startUrl:
  itemno = re.search(r"[0-9]+",startUrl).group()
  spider = CoupangViewItemPageSpider(startUrl = startUrl.encode('utf-8'), itemno = itemno, kindOf="coupang")

if "ticketmonster.co.kr" in startUrl:
  itemno = re.search(r"[0-9]+",startUrl).group()
  spider = TmonViewItemPageSpider(startUrl = startUrl.encode('utf-8'), itemno = itemno, kindOf="tmon")

resultQueue = Queue()
crawler = CrawlerWorker(spider, resultQueue)
crawler.start()
items = resultQueue.get()
body = {}
if(len(items) > 0):
  body = json.dumps(items[0].__dict__.get('_values'))

print "Content-Type: application/json"
print "Length:", len(body)
print ""
print body
def process_new_config(config):
    wokrer = CrawlerWorker()
    wokrer.run(config)
import json
import uuid
from crawler_worker import CrawlerWorker


with open('irr_config.json', 'r') as json_data:
    config = json.loads(json_data.read())
    config['id'] = str(uuid.uuid4())

wokrer = CrawlerWorker()
items = wokrer.run(config)

print len(items)