Python CommonsInitValue 예제들, commonutils_spider.CommonsInitValue Python 예제들

예제 #1

0

파일 보기

파일: YiCaiStockNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawYiCaiStockDailyNews(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextArray = browsor.find_elements_by_tag_name('dl')
    for context in contextArray:
        try:
            titleValue = context.find_element_by_tag_name('h1')
            descriptContext = context.find_element_by_tag_name('p').text
            pubDate = CommonsInitValue.initNowTime()
            linkUrl = context.find_element_by_tag_name('a').get_attribute(
                'href')
            try:
                imageObj = context.find_element_by_tag_name('img')
                imageUrl = imageObj.get_attribute('src')
            except NoSuchElementException, e:
                imageUrl = CommonsInitValue.initTempImage()

        except NoSuchElementException, e:
            continue
        title = titleValue.text
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'STOCK', 'YICAINET'
        ])

예제 #2

0

파일 보기

def crawYCFinanceHLDataSource(link):
    listArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    courrentContext = browsor.find_elements_by_tag_name('dl')

    for currentDiv in courrentContext:
        try:
            titleObj = currentDiv.find_element_by_tag_name('h1')
            title = titleObj.text
            linkUrl = titleObj.find_element_by_tag_name('a').get_attribute(
                'href')
            descriptContext = currentDiv.find_element_by_tag_name('p').text
            pubDate = CommonsInitValue.initNowTime()
            try:
                imageObj = currentDiv.find_element_by_tag_name('img')
                imageUrl = imageObj.get_attribute('src')
            except NoSuchElementException, e:
                imageUrl = CommonsInitValue.initTempImage()
        except NoSuchElementException, e:
            continue
        listArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'YCNET'
        ])

예제 #3

0

파일 보기

파일: XDNetOilNewsSpider.py 프로젝트: timedcy/kttspider

def crawMorningOilDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name('news_list_all').find_elements_by_tag_name('li')
    for context in maincontext:
        imageUrl = CommonsInitValue.initoiltempimage()
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'OIL','XIDU'])
    return currentArray

예제 #4

0

파일 보기

파일: ZBNewsFinanceNetSpider.py 프로젝트: timedcy/kttspider

def crawZBNewsNetDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('l_title')
    for context in contextList:
        pubDate = CommonsInitValue.initNowTime()
        try:
            imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        except NoSuchElementException,e:
            imageUrl = CommonsInitValue.initTempImage()
        title = context.find_element_by_class_name('title').text
        descriptContext = context.find_element_by_class_name('text').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','ZBNET'])

예제 #5

0

파일 보기

파일: CNNewsFinanceNetSpider.py 프로젝트: timedcy/kttspider

def crawCNFinanceNetDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_elements_by_class_name('art-list')
    for context in mainlist:
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        descriptContext = context.find_element_by_class_name('pic-details').text
        timeText = context.find_element_by_class_name('time').text
        datetime = CommonsInitValue.returnCreateDate(timeText)
        currentTime = CommonsInitValue.splitCreateDate(timeText,' ',1)
        pubDate =datetime+' '+currentTime
        imageUrl = CommonsInitValue.initTempImage()
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','21CNNET'])
    return currentList

예제 #6

0

파일 보기

def crawMorningOilDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name(
        'news_list_all').find_elements_by_tag_name('li')
    for context in maincontext:
        imageUrl = CommonsInitValue.initoiltempimage()
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'OIL', 'XIDU'
        ])
    return currentArray

예제 #7

0

파일 보기

def crawZBNewsNetDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('l_title')
    for context in contextList:
        pubDate = CommonsInitValue.initNowTime()
        try:
            imageUrl = context.find_element_by_tag_name('img').get_attribute(
                'src')
        except NoSuchElementException, e:
            imageUrl = CommonsInitValue.initTempImage()
        title = context.find_element_by_class_name('title').text
        descriptContext = context.find_element_by_class_name('text').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'ZBNET'
        ])

예제 #8

0

파일 보기

파일: morningoil_spider_controller.py 프로젝트: timedcy/kttspider

def crawDailyOilNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    print '----START CRAW XDNETNEWS OIL NEWS----'
    try:
        XDNetOilNewsSpider.writeMorningOilDailyNews()
    except Exception, e:
        currentList.append([currentTime,str(uuid.uuid1()),'XDNetOilNewsSpider.writeMorningOilDailyNews',e])

예제 #9

0

파일 보기

파일: HGStockNetSpider.py 프로젝트: timedcy/kttspider

def crawDailyStockComments(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name("ul-news-list").find_elements_by_tag_name("li")
    for context in mainlist:
        linkUrl = context.find_element_by_tag_name("a").get_attribute("href")
        title = context.text
        pubDate = CommonsInitValue.initNowTime()
        currentList.append([str(uuid.uuid1()), linkUrl, title, pubDate, "[...]", "STOCK", "HGNET"])
    return currentList

예제 #10

0

파일 보기

파일: TakFinanceHLNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawFinanceHLDataSource(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    maincontext = browsor.find_element_by_id('news_pic').find_element_by_class_name('changeDiv')
    linkUrl = maincontext.find_element_by_tag_name('a').get_attribute('href')
    pubDate = CommonsInitValue.initNowTime()
    imageUrl = maincontext.find_element_by_tag_name('img').get_attribute('src')
    title = maincontext.find_element_by_tag_name('img').get_attribute('alt')
    currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,'[...]','MACRO','TAKCHINA'])
    return currentList

예제 #11

0

파일 보기

파일: CNNewsStockNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawCNStockNetDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_elements_by_class_name('art-list')
    for context in mainlist:
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        descriptContext = context.find_element_by_class_name(
            'pic-details').text
        timeText = context.find_element_by_class_name('time').text
        datetime = CommonsInitValue.returnCreateDate(timeText)
        currentTime = CommonsInitValue.splitCreateDate(timeText, ' ', 1)
        pubDate = datetime + ' ' + currentTime
        imageUrl = CommonsInitValue.initTempImage()
        currentList.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'STOCK', '21CNNET'
        ])
    return currentList

예제 #12

0

파일 보기

def  crawThemeNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW THE IMPORT NEWS
    print '----START CRAW THE IMPORT NEWS----'
    try:
       ImportantNewsSpider.writeCompanyNews()
    except Exception, e:
        currentList.append([currentTime,str(uuid.uuid1()),'ImportantNewsSpider.writeCompanyNews',e])

예제 #13

0

파일 보기

파일: datacenter_spider_controller.py 프로젝트: timedcy/kttspider

def crawDataCenter():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW FOREXGOLD DATA SIPDER
    print '----START CRAW FOREXGOLD DATA----'
    try:
       ForexGoldDataNetSpider.writeForexGoldDataSource()
    except Exception, e:
        currentList.append([currentTime,str(uuid.uuid1()),'ForexGoldDataNetSpider.writeForexGoldDataSource()',e])

예제 #14

0

파일 보기

파일: YiCaiStockNetSpider.py 프로젝트: timedcy/kttspider

def crawYiCaiStockDailyNews(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextArray = browsor.find_elements_by_tag_name('dl')
    for context in contextArray:
        try:
          titleValue = context.find_element_by_tag_name('h1')
          descriptContext = context.find_element_by_tag_name('p').text
          pubDate = CommonsInitValue.initNowTime()
          linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
          try:
              imageObj = context.find_element_by_tag_name('img')
              imageUrl = imageObj.get_attribute('src')
          except NoSuchElementException,e:
              imageUrl = CommonsInitValue.initTempImage()

        except NoSuchElementException,e:
              continue
        title = titleValue.text
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'STOCK','YICAINET'])

예제 #15

0

파일 보기

파일: YiCaiFinanceNetSpider.py 프로젝트: timedcy/kttspider

def crawYCFinanceHLDataSource(link):
    listArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    courrentContext = browsor.find_elements_by_tag_name('dl')

    for currentDiv in  courrentContext:
         try:
              titleObj = currentDiv.find_element_by_tag_name('h1')
              title = titleObj.text
              linkUrl = titleObj.find_element_by_tag_name('a').get_attribute('href')
              descriptContext = currentDiv.find_element_by_tag_name('p').text
              pubDate = CommonsInitValue.initNowTime()
              try:
                  imageObj = currentDiv.find_element_by_tag_name('img')
                  imageUrl = imageObj.get_attribute('src')
              except NoSuchElementException,e:
                  imageUrl = CommonsInitValue.initTempImage()
         except NoSuchElementException,e:
              continue
         listArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','YCNET'])

예제 #16

0

파일 보기

파일: UNNetOilNewsSpider.py 프로젝트: wusezhangserver/kttspider

def crawMorningOilDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    mainlist = browsor.find_element_by_id('table').find_elements_by_class_name('evenrow')
    for context in mainlist:
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = CommonsInitValue.initNowTime()
        if title =='':
            continue
        print title+":"+linkUrl

예제 #17

0

파일 보기

파일: FTFinanceHLNetSpider.py 프로젝트: timedcy/kttspider

def crawFinanceHLDataSource(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainContext = browsor.find_element_by_class_name('column11')
    title = mainContext.find_element_by_tag_name('a').text
    linkUrl = mainContext.find_element_by_tag_name('a').get_attribute('href')
    imageUrl = mainContext.find_element_by_tag_name('img').get_attribute('src')
    descriptContext = mainContext.find_element_by_class_name('lead').text
    pubDate = CommonsInitValue.initNowTime()
    currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'MACRO','FTCHINA'])
    return currentList

예제 #18

0

파일 보기

파일: AdsNetSpider.py 프로젝트: timedcy/kttspider

def crawDailyComments(link):

    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('news-item')
    for mainContext in contextList:
        pubDate = CommonsInitValue.initNowTime()
        title = mainContext.find_element_by_tag_name('a').text
        linkUrl = mainContext.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = mainContext.find_element_by_class_name('desc').text
        currentList.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'FOREX','ADSNET'])
    return currentList

예제 #19

0

파일 보기

파일: morningoil_spider_controller.py 프로젝트: wusezhangserver/kttspider

def crawDailyOilNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    print '----START CRAW XDNETNEWS OIL NEWS----'
    try:
        XDNetOilNewsSpider.writeMorningOilDailyNews()
    except Exception, e:
        currentList.append([
            currentTime,
            str(uuid.uuid1()), 'XDNetOilNewsSpider.writeMorningOilDailyNews', e
        ])

예제 #20

0

파일 보기

파일: NBDNewsNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawMorningDailyNews(linkUrl):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    resultList = browsor.find_elements_by_class_name('mt24')
    for div in resultList:
        imageUrl = div.find_element_by_tag_name('img').get_attribute('src')
        linkUrl = div.find_element_by_tag_name('a').get_attribute('href')
        title = div.find_element_by_tag_name('a').text
        descriptContext  = div.find_element_by_class_name('news-p').text
        pubDate = CommonsInitValue.initNowTime()
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','NBDNET'])
    return currentList

예제 #21

0

파일 보기

파일: IFengNewsFinanceNetSpider.py 프로젝트: timedcy/kttspider

def crawMorningFinanceDailyNews(linkUrl):
    currentArray=[]
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    mainList = browsor.find_element_by_id('list01').find_elements_by_tag_name('li')
    for context in mainList:
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = context.find_element_by_class_name('date').text
        descriptContext = context.find_element_by_tag_name('p').text
        imageUrl = CommonsInitValue.initTempImage()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','IFengNET'])
    return currentArray

예제 #22

0

파일 보기

파일: TakNewsStockNetSpider.py 프로젝트: timedcy/kttspider

def crawFinanceHLDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name('news_list').find_elements_by_class_name('list')
    for context in mainlist:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        title = context.find_element_by_class_name('title').text
        linkUrl = context.find_element_by_class_name('title').find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'STOCK','TAKCHINA'])
    return currentArray

예제 #23

0

파일 보기

파일: XXCBNewsFinanceNetSpider.py 프로젝트: timedcy/kttspider

def crawMorningFinanceDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name('area_left')\
            .find_elements_by_class_name('list_item')
    for context in maincontext:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('h2').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','XXCB'])
    return currentArray

예제 #24

0

파일 보기

파일: JFStockNetSpider.py 프로젝트: timedcy/kttspider

def crawDailyStockComments(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name('w_660')\
        .find_element_by_tag_name('ul')\
        .find_elements_by_tag_name('li')
    for context in mainlist:
        title = context.find_element_by_tag_name('a').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'STOCK','JFNET'])
    return currentArray

예제 #25

0

파일 보기

파일: forexpic_spider_controller.py 프로젝트: timedcy/kttspider

def updateDailyForexPic():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    log.info('The system crawling the resource of forex picture ')
    print '----START CRAW THE FOREX PICTURE----'
    CnForexImageSpider.writeForexImages()

    print '----START CRAW THE XEHUN PICTURE----'
    try:
        HeXunForexImageSpider.writeHeXunForexImage()
    except Exception,e:
        currentList.append([currentTime,str(uuid.uuid1()),'HeXunForexImageSpider.writeHeXunForexImage',e])

예제 #26

0

파일 보기

파일: GXMetalNetSpider.py 프로젝트: timedcy/kttspider

def crawDailyMetalComments(link):
    currentArray =[]
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_element_by_class_name('right_box796')\
        .find_element_by_tag_name('ul')\
        .find_elements_by_tag_name('li')
    for context in contextList:
        pubDate = context.find_element_by_class_name('time').text
        title = context.find_element_by_tag_name('a').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = CommonsInitValue.removeSpecialCharacter(context.text)
        currentArray.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'METAL','GXNET'])
    return currentArray

예제 #27

0

파일 보기

파일: InvestingNewsMetalSpider.py 프로젝트: timedcy/kttspider

def crawMorningMetalDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('articleItem')
    for context in contextList:
        try:
            linkUrl = context.find_element_by_class_name('img').get_attribute('href')
            imageUrl = context.find_element_by_class_name('img').find_element_by_tag_name('img').get_attribute('src')
            title = context.find_element_by_class_name('title').text
            pubDate = CommonsInitValue.initNowTime()
            descriptContext = context.find_element_by_tag_name('p').text
        except NoSuchElementException,e:
            continue
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'METAL','INVESTINGNET'])

예제 #28

0

파일 보기

def crawFinanceHLDataSource(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainContext = browsor.find_element_by_class_name('column11')
    title = mainContext.find_element_by_tag_name('a').text
    linkUrl = mainContext.find_element_by_tag_name('a').get_attribute('href')
    imageUrl = mainContext.find_element_by_tag_name('img').get_attribute('src')
    descriptContext = mainContext.find_element_by_class_name('lead').text
    pubDate = CommonsInitValue.initNowTime()
    currentList.append([
        str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate, descriptContext,
        'MACRO', 'FTCHINA'
    ])
    return currentList

예제 #29

0

파일 보기

파일: morningnews_spider_controller.py 프로젝트: timedcy/kttspider

def crawDailyNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW HEJNEWS COMMENTS NEWS SIPDER
    #print '----START CRAW HEJNEWS NEWS----'
    #HEJNewsNetSpider.writeMorningDailyNews()
    
    # CRAW QQNEWS COMMENTS NEWS SIPDER
    print '----START CRAW QQNEWS NEWS----'
    try:
        QQNewsNetSpider.writeMorningQQDailyNews()
    except Exception,e:
        print e

예제 #30

0

파일 보기

def crawDailyNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW HEJNEWS COMMENTS NEWS SIPDER
    #print '----START CRAW HEJNEWS NEWS----'
    #HEJNewsNetSpider.writeMorningDailyNews()

    # CRAW QQNEWS COMMENTS NEWS SIPDER
    print '----START CRAW QQNEWS NEWS----'
    try:
        QQNewsNetSpider.writeMorningQQDailyNews()
    except Exception, e:
        print e

예제 #31

0

파일 보기

def crawDataCenter():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW FOREXGOLD DATA SIPDER
    print '----START CRAW FOREXGOLD DATA----'
    try:
        ForexGoldDataNetSpider.writeForexGoldDataSource()
    except Exception, e:
        currentList.append([
            currentTime,
            str(uuid.uuid1()),
            'ForexGoldDataNetSpider.writeForexGoldDataSource()', e
        ])

예제 #32

0

파일 보기

def crawMorningForexDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('articleItem')
    for context in contextList:
        try:
            linkUrl = context.find_element_by_class_name('img').get_attribute('href')
            imageUrl = context.find_element_by_class_name('img').find_element_by_tag_name('img').get_attribute('src')
            title = context.find_element_by_class_name('title').text
            pubDate = CommonsInitValue.initNowTime()
            descriptContext = context.find_element_by_tag_name('p').text
        except NoSuchElementException,e:
            continue
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'FOREX','INVESTINGNET'])

예제 #33

0

파일 보기

파일: CnForexImageSpider.py 프로젝트: timedcy/kttspider

def crawCnForexImages(link,keyList):
    currentArray = []
    detaiArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    imageList = browsor.find_elements_by_class_name('imgModel')
    for model in imageList:
        linkUrl = model.find_element_by_tag_name('a').get_attribute('href')
        imageUrl = model.find_element_by_tag_name('img').get_attribute('src')
        pubDate = CommonsInitValue.returnCreateDate(model.find_element_by_tag_name('p').text)
        if not (imageUrl in keyList):
            mianId = str(uuid.uuid1())
            currentArray.append([mianId,imageUrl,linkUrl,pubDate,'CNFOREXNET'])
            detaiArray.append([mianId,linkUrl])
    CnForexImageDetailSpider.writeCnForexImageDetail(detaiArray)
    return currentArray

예제 #34

0

파일 보기

def crawMorningFinanceDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    mainList = browsor.find_element_by_id('list01').find_elements_by_tag_name(
        'li')
    for context in mainList:
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = context.find_element_by_class_name('date').text
        descriptContext = context.find_element_by_tag_name('p').text
        imageUrl = CommonsInitValue.initTempImage()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'IFengNET'
        ])
    return currentArray

예제 #35

0

파일 보기

파일: InvestingNewsForexSpider.py 프로젝트: timedcy/kttspider

def crawMorningForexDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name("articleItem")
    for context in contextList:
        try:
            linkUrl = context.find_element_by_class_name("img").get_attribute("href")
            imageUrl = context.find_element_by_class_name("img").find_element_by_tag_name("img").get_attribute("src")
            title = context.find_element_by_class_name("title").text
            pubDate = CommonsInitValue.initNowTime()
            descriptContext = context.find_element_by_tag_name("p").text
        except NoSuchElementException, e:
            continue
        currentList.append(
            [str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate, descriptContext, "FOREX", "INVESTINGNET"]
        )

예제 #36

0

파일 보기

def crawDailyMetalComments(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_element_by_class_name('right_box796')\
        .find_element_by_tag_name('ul')\
        .find_elements_by_tag_name('li')
    for context in contextList:
        pubDate = context.find_element_by_class_name('time').text
        title = context.find_element_by_tag_name('a').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = CommonsInitValue.removeSpecialCharacter(context.text)
        currentArray.append([
            str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext,
            'METAL', 'GXNET'
        ])
    return currentArray

예제 #37

0

파일 보기

파일: AdsNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawDailyComments(link):

    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('news-item')
    for mainContext in contextList:
        pubDate = CommonsInitValue.initNowTime()
        title = mainContext.find_element_by_tag_name('a').text
        linkUrl = mainContext.find_element_by_tag_name('a').get_attribute(
            'href')
        descriptContext = mainContext.find_element_by_class_name('desc').text
        currentList.append([
            str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext,
            'FOREX', 'ADSNET'
        ])
    return currentList

예제 #38

0

파일 보기

파일: XXCBNewsFinanceNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawMorningFinanceDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name('area_left')\
            .find_elements_by_class_name('list_item')
    for context in maincontext:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('h2').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'XXCB'
        ])
    return currentArray

예제 #39

0

파일 보기

def crawDailyStockComments(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name('w_660')\
        .find_element_by_tag_name('ul')\
        .find_elements_by_tag_name('li')
    for context in mainlist:
        title = context.find_element_by_tag_name('a').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext,
            'STOCK', 'JFNET'
        ])
    return currentArray

예제 #40

0

파일 보기

파일: TakNewsStockNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawFinanceHLDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name(
        'news_list').find_elements_by_class_name('list')
    for context in mainlist:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        title = context.find_element_by_class_name('title').text
        linkUrl = context.find_element_by_class_name(
            'title').find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'STOCK', 'TAKCHINA'
        ])
    return currentArray

예제 #41

0

파일 보기

파일: CnForexImageSpider.py 프로젝트: wusezhangserver/kttspider

def crawCnForexImages(link, keyList):
    currentArray = []
    detaiArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    imageList = browsor.find_elements_by_class_name('imgModel')
    for model in imageList:
        linkUrl = model.find_element_by_tag_name('a').get_attribute('href')
        imageUrl = model.find_element_by_tag_name('img').get_attribute('src')
        pubDate = CommonsInitValue.returnCreateDate(
            model.find_element_by_tag_name('p').text)
        if not (imageUrl in keyList):
            mianId = str(uuid.uuid1())
            currentArray.append(
                [mianId, imageUrl, linkUrl, pubDate, 'CNFOREXNET'])
            detaiArray.append([mianId, linkUrl])
    CnForexImageDetailSpider.writeCnForexImageDetail(detaiArray)
    return currentArray

예제 #42

0

파일 보기

파일: CXNewsNetSpider.py 프로젝트: timedcy/kttspider

def crawMorningDailyNews(link):
    listArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainContext = browsor.find_element_by_id('listArticle')
    listContext = mainContext.find_elements_by_class_name('boxa')
    initImage = CommonsInitValue.initTempImage()
    for context in listContext:
        try:
            imageContext = context.find_element_by_class_name('pic')
            imageUrl = imageContext.find_element_by_tag_name('img').get_attribute('src')
        except NoSuchElementException,e:
            imageUrl = initImage
        title = context.find_element_by_tag_name('h4').text
        linkUrl = context.find_element_by_tag_name('h4')\
                          .find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = time.strftime("%Y-%m-%d %X",time.localtime())
        listArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'STOCK','CXNET'])

예제 #43

0

파일 보기

파일: HeXunForexImageSpider.py 프로젝트: timedcy/kttspider

def crawHeXunForexImage(link,keyList):
    currentArray =[]
    detaiArray=[]
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    imageList = browsor.find_element_by_class_name('tupianpindao')
    mainList = imageList.find_elements_by_tag_name('div')
    for context in mainList:
        try:
            linkObj = context.find_element_by_tag_name('a')
            linkUrl = linkObj.get_attribute('href')
            imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
            pubDate = CommonsInitValue.splitCreateDate(linkUrl,'/',3)
            descriptContext = context.find_element_by_tag_name('p').text
            if not (imageUrl in keyList):
                mianId = str(uuid.uuid1())
                currentArray.append([mianId,imageUrl,linkUrl,pubDate,'HEXUNFOREXNET',descriptContext])
                detaiArray.append([mianId,linkUrl])
        except NoSuchElementException,e:
            continue

예제 #44

0

파일 보기

파일: CXNewsNetSpider.py 프로젝트: wusezhangserver/kttspider

def crawMorningDailyNews(link):
    listArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainContext = browsor.find_element_by_id('listArticle')
    listContext = mainContext.find_elements_by_class_name('boxa')
    initImage = CommonsInitValue.initTempImage()
    for context in listContext:
        try:
            imageContext = context.find_element_by_class_name('pic')
            imageUrl = imageContext.find_element_by_tag_name(
                'img').get_attribute('src')
        except NoSuchElementException, e:
            imageUrl = initImage
        title = context.find_element_by_tag_name('h4').text
        linkUrl = context.find_element_by_tag_name('h4')\
                          .find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = time.strftime("%Y-%m-%d %X", time.localtime())
        listArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'STOCK', 'CXNET'
        ])

예제 #45

0

파일 보기

def crawHeXunForexImage(link, keyList):
    currentArray = []
    detaiArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    imageList = browsor.find_element_by_class_name('tupianpindao')
    mainList = imageList.find_elements_by_tag_name('div')
    for context in mainList:
        try:
            linkObj = context.find_element_by_tag_name('a')
            linkUrl = linkObj.get_attribute('href')
            imageUrl = context.find_element_by_tag_name('img').get_attribute(
                'src')
            pubDate = CommonsInitValue.splitCreateDate(linkUrl, '/', 3)
            descriptContext = context.find_element_by_tag_name('p').text
            if not (imageUrl in keyList):
                mianId = str(uuid.uuid1())
                currentArray.append([
                    mianId, imageUrl, linkUrl, pubDate, 'HEXUNFOREXNET',
                    descriptContext
                ])
                detaiArray.append([mianId, linkUrl])
        except NoSuchElementException, e:
            continue