Esempio n. 1
0
def crawYCFinanceHLDataSource(link):
    listArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    courrentContext = browsor.find_elements_by_tag_name('dl')

    for currentDiv in courrentContext:
        try:
            titleObj = currentDiv.find_element_by_tag_name('h1')
            title = titleObj.text
            linkUrl = titleObj.find_element_by_tag_name('a').get_attribute(
                'href')
            descriptContext = currentDiv.find_element_by_tag_name('p').text
            pubDate = CommonsInitValue.initNowTime()
            try:
                imageObj = currentDiv.find_element_by_tag_name('img')
                imageUrl = imageObj.get_attribute('src')
            except NoSuchElementException, e:
                imageUrl = CommonsInitValue.initTempImage()
        except NoSuchElementException, e:
            continue
        listArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'YCNET'
        ])
def crawYiCaiStockDailyNews(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextArray = browsor.find_elements_by_tag_name('dl')
    for context in contextArray:
        try:
            titleValue = context.find_element_by_tag_name('h1')
            descriptContext = context.find_element_by_tag_name('p').text
            pubDate = CommonsInitValue.initNowTime()
            linkUrl = context.find_element_by_tag_name('a').get_attribute(
                'href')
            try:
                imageObj = context.find_element_by_tag_name('img')
                imageUrl = imageObj.get_attribute('src')
            except NoSuchElementException, e:
                imageUrl = CommonsInitValue.initTempImage()

        except NoSuchElementException, e:
            continue
        title = titleValue.text
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'STOCK', 'YICAINET'
        ])
def crawDailyOilNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    print '----START CRAW XDNETNEWS OIL NEWS----'
    try:
        XDNetOilNewsSpider.writeMorningOilDailyNews()
    except Exception, e:
        currentList.append([currentTime,str(uuid.uuid1()),'XDNetOilNewsSpider.writeMorningOilDailyNews',e])
Esempio n. 4
0
def crawDailyStockComments(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name("ul-news-list").find_elements_by_tag_name("li")
    for context in mainlist:
        linkUrl = context.find_element_by_tag_name("a").get_attribute("href")
        title = context.text
        pubDate = CommonsInitValue.initNowTime()
        currentList.append([str(uuid.uuid1()), linkUrl, title, pubDate, "[...]", "STOCK", "HGNET"])
    return currentList
def crawFinanceHLDataSource(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    maincontext = browsor.find_element_by_id('news_pic').find_element_by_class_name('changeDiv')
    linkUrl = maincontext.find_element_by_tag_name('a').get_attribute('href')
    pubDate = CommonsInitValue.initNowTime()
    imageUrl = maincontext.find_element_by_tag_name('img').get_attribute('src')
    title = maincontext.find_element_by_tag_name('img').get_attribute('alt')
    currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,'[...]','MACRO','TAKCHINA'])
    return currentList
Esempio n. 6
0
def  crawThemeNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW THE IMPORT NEWS
    print '----START CRAW THE IMPORT NEWS----'
    try:
       ImportantNewsSpider.writeCompanyNews()
    except Exception, e:
        currentList.append([currentTime,str(uuid.uuid1()),'ImportantNewsSpider.writeCompanyNews',e])
def crawDataCenter():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW FOREXGOLD DATA SIPDER
    print '----START CRAW FOREXGOLD DATA----'
    try:
       ForexGoldDataNetSpider.writeForexGoldDataSource()
    except Exception, e:
        currentList.append([currentTime,str(uuid.uuid1()),'ForexGoldDataNetSpider.writeForexGoldDataSource()',e])
def crawMorningOilDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    mainlist = browsor.find_element_by_id('table').find_elements_by_class_name('evenrow')
    for context in mainlist:
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = CommonsInitValue.initNowTime()
        if title =='':
            continue
        print title+":"+linkUrl
Esempio n. 9
0
def crawFinanceHLDataSource(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainContext = browsor.find_element_by_class_name('column11')
    title = mainContext.find_element_by_tag_name('a').text
    linkUrl = mainContext.find_element_by_tag_name('a').get_attribute('href')
    imageUrl = mainContext.find_element_by_tag_name('img').get_attribute('src')
    descriptContext = mainContext.find_element_by_class_name('lead').text
    pubDate = CommonsInitValue.initNowTime()
    currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'MACRO','FTCHINA'])
    return currentList
Esempio n. 10
0
def crawMorningOilDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name('news_list_all').find_elements_by_tag_name('li')
    for context in maincontext:
        imageUrl = CommonsInitValue.initoiltempimage()
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'OIL','XIDU'])
    return currentArray
Esempio n. 11
0
def crawFinanceHLDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name('news_list').find_elements_by_class_name('list')
    for context in mainlist:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        title = context.find_element_by_class_name('title').text
        linkUrl = context.find_element_by_class_name('title').find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'STOCK','TAKCHINA'])
    return currentArray
def crawMorningDailyNews(linkUrl):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    resultList = browsor.find_elements_by_class_name('mt24')
    for div in resultList:
        imageUrl = div.find_element_by_tag_name('img').get_attribute('src')
        linkUrl = div.find_element_by_tag_name('a').get_attribute('href')
        title = div.find_element_by_tag_name('a').text
        descriptContext  = div.find_element_by_class_name('news-p').text
        pubDate = CommonsInitValue.initNowTime()
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','NBDNET'])
    return currentList
def crawDailyOilNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    print '----START CRAW XDNETNEWS OIL NEWS----'
    try:
        XDNetOilNewsSpider.writeMorningOilDailyNews()
    except Exception, e:
        currentList.append([
            currentTime,
            str(uuid.uuid1()), 'XDNetOilNewsSpider.writeMorningOilDailyNews', e
        ])
Esempio n. 14
0
def crawDailyComments(link):

    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('news-item')
    for mainContext in contextList:
        pubDate = CommonsInitValue.initNowTime()
        title = mainContext.find_element_by_tag_name('a').text
        linkUrl = mainContext.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = mainContext.find_element_by_class_name('desc').text
        currentList.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'FOREX','ADSNET'])
    return currentList
def crawMorningFinanceDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name('area_left')\
            .find_elements_by_class_name('list_item')
    for context in maincontext:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('h2').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','XXCB'])
    return currentArray
def updateDailyForexPic():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    log.info('The system crawling the resource of forex picture ')
    print '----START CRAW THE FOREX PICTURE----'
    CnForexImageSpider.writeForexImages()

    print '----START CRAW THE XEHUN PICTURE----'
    try:
        HeXunForexImageSpider.writeHeXunForexImage()
    except Exception,e:
        currentList.append([currentTime,str(uuid.uuid1()),'HeXunForexImageSpider.writeHeXunForexImage',e])
Esempio n. 17
0
def crawDailyStockComments(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name('w_660')\
        .find_element_by_tag_name('ul')\
        .find_elements_by_tag_name('li')
    for context in mainlist:
        title = context.find_element_by_tag_name('a').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'STOCK','JFNET'])
    return currentArray
def crawMorningMetalDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('articleItem')
    for context in contextList:
        try:
            linkUrl = context.find_element_by_class_name('img').get_attribute('href')
            imageUrl = context.find_element_by_class_name('img').find_element_by_tag_name('img').get_attribute('src')
            title = context.find_element_by_class_name('title').text
            pubDate = CommonsInitValue.initNowTime()
            descriptContext = context.find_element_by_tag_name('p').text
        except NoSuchElementException,e:
            continue
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'METAL','INVESTINGNET'])
Esempio n. 19
0
def crawFinanceHLDataSource(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainContext = browsor.find_element_by_class_name('column11')
    title = mainContext.find_element_by_tag_name('a').text
    linkUrl = mainContext.find_element_by_tag_name('a').get_attribute('href')
    imageUrl = mainContext.find_element_by_tag_name('img').get_attribute('src')
    descriptContext = mainContext.find_element_by_class_name('lead').text
    pubDate = CommonsInitValue.initNowTime()
    currentList.append([
        str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate, descriptContext,
        'MACRO', 'FTCHINA'
    ])
    return currentList
Esempio n. 20
0
def crawDailyNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW HEJNEWS COMMENTS NEWS SIPDER
    #print '----START CRAW HEJNEWS NEWS----'
    #HEJNewsNetSpider.writeMorningDailyNews()

    # CRAW QQNEWS COMMENTS NEWS SIPDER
    print '----START CRAW QQNEWS NEWS----'
    try:
        QQNewsNetSpider.writeMorningQQDailyNews()
    except Exception, e:
        print e
Esempio n. 21
0
def crawMorningForexDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('articleItem')
    for context in contextList:
        try:
            linkUrl = context.find_element_by_class_name('img').get_attribute('href')
            imageUrl = context.find_element_by_class_name('img').find_element_by_tag_name('img').get_attribute('src')
            title = context.find_element_by_class_name('title').text
            pubDate = CommonsInitValue.initNowTime()
            descriptContext = context.find_element_by_tag_name('p').text
        except NoSuchElementException,e:
            continue
        currentList.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'FOREX','INVESTINGNET'])
Esempio n. 22
0
def crawDataCenter():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW FOREXGOLD DATA SIPDER
    print '----START CRAW FOREXGOLD DATA----'
    try:
        ForexGoldDataNetSpider.writeForexGoldDataSource()
    except Exception, e:
        currentList.append([
            currentTime,
            str(uuid.uuid1()),
            'ForexGoldDataNetSpider.writeForexGoldDataSource()', e
        ])
Esempio n. 23
0
def crawZBNewsNetDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('l_title')
    for context in contextList:
        pubDate = CommonsInitValue.initNowTime()
        try:
            imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        except NoSuchElementException,e:
            imageUrl = CommonsInitValue.initTempImage()
        title = context.find_element_by_class_name('title').text
        descriptContext = context.find_element_by_class_name('text').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','ZBNET'])
def crawDailyNews():

    currentList = []
    currentTime = CommonsInitValue.initNowTime()

    # CRAW HEJNEWS COMMENTS NEWS SIPDER
    #print '----START CRAW HEJNEWS NEWS----'
    #HEJNewsNetSpider.writeMorningDailyNews()
    
    # CRAW QQNEWS COMMENTS NEWS SIPDER
    print '----START CRAW QQNEWS NEWS----'
    try:
        QQNewsNetSpider.writeMorningQQDailyNews()
    except Exception,e:
        print e
def crawMorningForexDailyNews(link):
    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name("articleItem")
    for context in contextList:
        try:
            linkUrl = context.find_element_by_class_name("img").get_attribute("href")
            imageUrl = context.find_element_by_class_name("img").find_element_by_tag_name("img").get_attribute("src")
            title = context.find_element_by_class_name("title").text
            pubDate = CommonsInitValue.initNowTime()
            descriptContext = context.find_element_by_tag_name("p").text
        except NoSuchElementException, e:
            continue
        currentList.append(
            [str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate, descriptContext, "FOREX", "INVESTINGNET"]
        )
def crawMorningFinanceDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name('area_left')\
            .find_elements_by_class_name('list_item')
    for context in maincontext:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('h2').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'XXCB'
        ])
    return currentArray
Esempio n. 27
0
def crawMorningOilDailyNews(linkUrl):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(linkUrl)
    maincontext = browsor.find_element_by_class_name(
        'news_list_all').find_elements_by_tag_name('li')
    for context in maincontext:
        imageUrl = CommonsInitValue.initoiltempimage()
        descriptContext = context.find_element_by_tag_name('p').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        title = context.find_element_by_tag_name('a').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'OIL', 'XIDU'
        ])
    return currentArray
Esempio n. 28
0
def crawDailyStockComments(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name('w_660')\
        .find_element_by_tag_name('ul')\
        .find_elements_by_tag_name('li')
    for context in mainlist:
        title = context.find_element_by_tag_name('a').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext,
            'STOCK', 'JFNET'
        ])
    return currentArray
Esempio n. 29
0
def crawDailyComments(link):

    currentList = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('news-item')
    for mainContext in contextList:
        pubDate = CommonsInitValue.initNowTime()
        title = mainContext.find_element_by_tag_name('a').text
        linkUrl = mainContext.find_element_by_tag_name('a').get_attribute(
            'href')
        descriptContext = mainContext.find_element_by_class_name('desc').text
        currentList.append([
            str(uuid.uuid1()), linkUrl, title, pubDate, descriptContext,
            'FOREX', 'ADSNET'
        ])
    return currentList
def crawFinanceHLDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    mainlist = browsor.find_element_by_class_name(
        'news_list').find_elements_by_class_name('list')
    for context in mainlist:
        imageUrl = context.find_element_by_tag_name('img').get_attribute('src')
        title = context.find_element_by_class_name('title').text
        linkUrl = context.find_element_by_class_name(
            'title').find_element_by_tag_name('a').get_attribute('href')
        descriptContext = context.find_element_by_tag_name('p').text
        pubDate = CommonsInitValue.initNowTime()
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'STOCK', 'TAKCHINA'
        ])
    return currentArray
Esempio n. 31
0
def crawZBNewsNetDataSource(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextList = browsor.find_elements_by_class_name('l_title')
    for context in contextList:
        pubDate = CommonsInitValue.initNowTime()
        try:
            imageUrl = context.find_element_by_tag_name('img').get_attribute(
                'src')
        except NoSuchElementException, e:
            imageUrl = CommonsInitValue.initTempImage()
        title = context.find_element_by_class_name('title').text
        descriptContext = context.find_element_by_class_name('text').text
        linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
        currentArray.append([
            str(uuid.uuid1()), linkUrl, imageUrl, title, pubDate,
            descriptContext, 'CHINA', 'ZBNET'
        ])
Esempio n. 32
0
def crawYiCaiStockDailyNews(link):
    currentArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    contextArray = browsor.find_elements_by_tag_name('dl')
    for context in contextArray:
        try:
          titleValue = context.find_element_by_tag_name('h1')
          descriptContext = context.find_element_by_tag_name('p').text
          pubDate = CommonsInitValue.initNowTime()
          linkUrl = context.find_element_by_tag_name('a').get_attribute('href')
          try:
              imageObj = context.find_element_by_tag_name('img')
              imageUrl = imageObj.get_attribute('src')
          except NoSuchElementException,e:
              imageUrl = CommonsInitValue.initTempImage()

        except NoSuchElementException,e:
              continue
        title = titleValue.text
        currentArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'STOCK','YICAINET'])
Esempio n. 33
0
def crawYCFinanceHLDataSource(link):
    listArray = []
    browsor = webdriver.PhantomJS()
    browsor.get(link)
    courrentContext = browsor.find_elements_by_tag_name('dl')

    for currentDiv in  courrentContext:
         try:
              titleObj = currentDiv.find_element_by_tag_name('h1')
              title = titleObj.text
              linkUrl = titleObj.find_element_by_tag_name('a').get_attribute('href')
              descriptContext = currentDiv.find_element_by_tag_name('p').text
              pubDate = CommonsInitValue.initNowTime()
              try:
                  imageObj = currentDiv.find_element_by_tag_name('img')
                  imageUrl = imageObj.get_attribute('src')
              except NoSuchElementException,e:
                  imageUrl = CommonsInitValue.initTempImage()
         except NoSuchElementException,e:
              continue
         listArray.append([str(uuid.uuid1()),linkUrl,imageUrl,title,pubDate,descriptContext,'CHINA','YCNET'])