Python saveSettingsの例、util.saveSettings Pythonの例

コード例 #1

0

ファイルを表示

def downloadGoogleNews(downloadDate):
    result = False
    read_directory = os.path.join(Constants.ROOT_FOLDER,
                                  Constants.RECOMMENDATION_DIR,
                                  Constants.GOOGLE_LINKS_DIR, downloadDate,
                                  Constants.GOOGLE_LINKS_FILE)
    write_directory = os.path.join(Constants.ROOT_FOLDER,
                                   Constants.RECOMMENDATION_DIR,
                                   Constants.GOOGLE_NEWS_DIR, downloadDate)
    if not os.path.exists(write_directory):
        os.makedirs(write_directory)
    writeJson = {}
    try:
        hyperlinks = [line.strip() for line in open(read_directory)]
        count = 0
        for link in hyperlinks:
            url = link.replace("http://", "")
            url = url.replace("www.", "")
            parsedUrl = re.sub(r'\W+', '', url)
            if len(parsedUrl) > 25:
                parsedUrl = parsedUrl[:25]
            try:

                html_filename = os.path.join(write_directory, parsedUrl)
                if os.path.isfile(html_filename) is False:
                    htmlfile = urllib2.urlopen(link)
                    html = htmlfile.read()
                    ret = util.writeToFile(html, html_filename)

                    if ret == True:
                        linkDict = {}
                        linkDict["url"] = link
                        linkDict["content"] = ""
                        soup = BeautifulSoup(html, 'html.parser')
                        if soup.title and soup.title.contents[0]:
                            title = soup.title.contents[0]
                        else:
                            title = ""
                        linkDict["title"] = title
                        writeJson[parsedUrl] = linkDict
                count = count + 1
                print 'downloaded link =' + url
            except Exception, e:
                util.logger.error("Exception at downloading link : %s" % url)

        if count > Constants.MIN_GOOGLELINKS_DAILY:
            result = writeUrlJson(writeJson, downloadDate)
            if result == True:
                util.saveSettings(Constants.LAST_GOOGLENEWS_DOWNLOAD,
                                  downloadDate)
                util.logger.info("Google news downloaded for =" +
                                 downloadDate + " links=" + str(count))

        else:
            util.logger.error("Google news failed to download for =" +
                              downloadDate + " links=" + str(count))

コード例 #2

0

ファイルを表示

ファイル: RecommendationMetric.py プロジェクト: shashankneo/projects

def RecommendationMetric():
     todayDateFolder = util.getTodayDateFolder()
     lastRecommended= util.loadSettings(Constants.LAST_RECOMMENDATION_DONE)
     if todayDateFolder == lastRecommended:
         return True
     try:
         relevance_json = {}
                  
         #Get Relevance json
         readRelevanceDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.RELEVANCE_FILE)
         if os.path.isfile(readRelevanceDir) is True:
             with open(readRelevanceDir) as json_data:
                json_text = json_data.read()
                relevance_json = json.loads(json_text)
                json_data.close()
            
        #Get Smoothness json
         smoothness_json = {}
         readSmoothnessDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.SMOOTHNESS_FILE)
         if os.path.isfile(readSmoothnessDir) is True:
             with open(readSmoothnessDir) as json_data:
                json_text = json_data.read()
                smoothness_json = json.loads(json_text)
                json_data.close()
            
        #Get Clarity json
         clarity_json = {}
         
         readClarityDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.CLARITY_FILE)
         if os.path.isfile(readClarityDir) is True:
             with open(readClarityDir) as json_data:
                json_text = json_data.read()
                clarity_json = json.loads(json_text)
                json_data.close()
                
        #Lets not apply any linear weight formula for now
         cou = Counter()
         cou.update(relevance_json)
         cou.update(smoothness_json)
         cou.update(clarity_json)
         
         #Convert above back to a dictionary
         final_json = dict(cou)
         result = printRecommendedDocs(final_json, todayDateFolder)
         if result == True:
             util.saveSettings(Constants.LAST_RECOMMENDATION_DONE, todayDateFolder)
             util.logger.info("Recommended links done for ="+ todayDateFolder)
         pass
     except Exception, e:
            util.logger.error( "Exception at recommending links for : %s Exception = %s" 
                               % (todayDateFolder,traceback.print_exc))

コード例 #3

0

ファイルを表示

ファイル: RecommendationMetric.py プロジェクト: weberna/SuPrIA

def RecommendationMetric():
     todayDateFolder = util.getTodayDateFolder()
     lastRecommended= util.loadSettings(Constants.LAST_RECOMMENDATION_DONE)
     if todayDateFolder == lastRecommended:
         return True
     try:
         relevance_json = {}
                  
         #Get Relevance json
         readRelevanceDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.GOOGLENEWS,Constants.RELEVANCE_FILE)
         if os.path.isfile(readRelevanceDir) is True:
             with open(readRelevanceDir) as json_data:
                json_text = json_data.read()
                relevance_json = json.loads(json_text)
                json_data.close()
            
        #Get Smoothness json
         smoothness_json = {}
         readSmoothnessDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.GOOGLENEWS,Constants.SMOOTHNESS_FILE)
         if os.path.isfile(readSmoothnessDir) is True:
             with open(readSmoothnessDir) as json_data:
                json_text = json_data.read()
                smoothness_json = json.loads(json_text)
                json_data.close()
            
        #Get Clarity json
         clarity_json = {}
         
         readClarityDir = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.ENGINE_DIR
                                         ,todayDateFolder,Constants.GOOGLENEWS,Constants.CLARITY_FILE)
         if os.path.isfile(readClarityDir) is True:
             with open(readClarityDir) as json_data:
                json_text = json_data.read()
                clarity_json = json.loads(json_text)
                json_data.close()
                
        #Lets not apply any linear weight formula for now
         cou = Counter()
         cou.update(relevance_json)
         cou.update(smoothness_json)
         cou.update(clarity_json)
         
         #Convert above back to a dictionary
         final_json = dict(cou)
         result = printRecommendedDocs(final_json, todayDateFolder)
         if result == True:
             util.saveSettings(Constants.LAST_RECOMMENDATION_DONE, todayDateFolder)
             util.logger.info("Recommended Google links done for ="+ todayDateFolder)
         pass
     except Exception, e:
            util.logger.error( "Exception at recommending google links for : %s Exception = %s" 
                               % (todayDateFolder,traceback.print_exc))

コード例 #4

0

ファイルを表示

ファイル: GoogleNews.py プロジェクト: shashankneo/projects

def downloadGoogleNews(downloadDate):
    result = False
    read_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.GOOGLE_LINKS_DIR,downloadDate,
                                  Constants.GOOGLE_LINKS_FILE)
    write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.GOOGLE_NEWS_DIR,downloadDate)
    if not os.path.exists(write_directory):
            os.makedirs(write_directory)
    writeJson = {}
    try:
        hyperlinks = [line.strip() for line in open(read_directory)]
        count = 0
        for link in hyperlinks:
             url=link.replace("http://", "")
             url=url.replace("www.", "")
             parsedUrl=re.sub(r'\W+', '', url)
             if len(parsedUrl) > 25:
             parsedUrl=parsedUrl[:25]
             try:
                 
                 html_filename = os.path.join(write_directory,parsedUrl)
                 if os.path.isfile(html_filename) is False:
                      htmlfile = urllib2.urlopen(link)
                      html = htmlfile.read()
                      ret = util.writeToFile(html,html_filename)
                
                      if ret == True:
                          linkDict = {}
                          linkDict["url"] = link
                          linkDict["content"] = ""
                          soup = BeautifulSoup(html, 'html.parser')
                          if soup.title and soup.title.contents[0]:
                              title = soup.title.contents[0]
                          else:
                              title = ""
                          linkDict["title"] = title
                          writeJson[parsedUrl] = linkDict
                 count=count+1
                 print 'downloaded link ='+url
             except Exception, e:
                 util.logger.error("Exception at downloading link : %s" % url)
             
             
        
        if count > Constants.MIN_GOOGLELINKS_DAILY:
            result = writeUrlJson(writeJson, downloadDate )
            if result == True:
            util.saveSettings(Constants.LAST_GOOGLENEWS_DOWNLOAD, downloadDate)
            util.logger.info("Google news downloaded for ="+downloadDate+" links="+str(count))
            
        else:
            util.logger.error("Google news failed to download for ="+downloadDate+" links="+str(count))
    except Exception, e:
        print "Exception at open Google news links for download: %s" % read_directory

コード例 #5

0

ファイルを表示

ファイル: Clarity.py プロジェクト: weberna/SuPrIA

def ConnectionClarity():
    todayDate = util.getYesterdayDateFolder()
    lastClarityDate = util.loadSettings(Constants.LAST_CLARITY_DIR)
    lastSuggClarityDate = util.loadSettings(Constants.LAST_SUGG_CLARITY_DIR)
    if lastClarityDate:
        util.logger.info("Google Clarity done last for =" + lastClarityDate)
    else:
        util.logger.info("Google Clarity done last for none")
    if lastSuggClarityDate:
        util.logger.info("Sugg Clarity done last for =" + lastClarityDate)
    else:
        util.logger.info("Sugg Clarity done last for none")

    if todayDate == lastClarityDate and todayDate == lastSuggClarityDate:
        util.logger.info("Clarity signal done for today =" + todayDate)
        return True

    trainFiles = util.findTrainingFiles()
    trainFiles = util.random_select(trainFiles)
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)

    normalClarity = True
    if todayDate != lastClarityDate:
        testFiles = util.findTestFiles()
        testCorpus, usedTestFiles = util.findCorpus(testFiles)
        clarityobj = Clarity(trainCorpus, testCorpus)
        clarityScore = clarityobj.ClarityScore()
        normalClarity = printNormalRankedDocs(clarityScore, usedTestFiles)
        if normalClarity == True:
            util.saveSettings(Constants.LAST_CLARITY_DIR, todayDate)
            util.logger.info("Google Clarity info just completed for =" +
                             todayDate)

    suggClarity = True
    if todayDate != lastClarityDate:
        testFiles = util.findSuggTestFiles()
        testCorpus, usedTestFiles = util.findCorpus(testFiles)
        clarityobj = Clarity(trainCorpus, testCorpus)
        clarityScore = clarityobj.ClarityScore()
        suggClarity = printSuggRankedDocs(clarityScore, usedTestFiles)
        if suggClarity == True:
            util.saveSettings(Constants.LAST_SUGG_CLARITY_DIR, todayDate)
            util.logger.info("SuggGoogle Clarity info just completed for =" +
                             todayDate)

    return normalClarity or suggClarity

コード例 #6

0

ファイルを表示

ファイル: Clarity.py プロジェクト: shashankneo/projects

def ConnectionClarity():
    todayDate = util.getTodayDateFolder()
    lastClarityDate = util.loadSettings(Constants.LAST_CLARITY_DIR)
    if todayDate == lastClarityDate :
        util.logger.info("Clarity signal done for today =" + todayDate)
        return True
    trainFiles = util.findTrainingFiles()
    testFiles = util.findTestFiles()
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)
    testCorpus, usedTestFiles = util.findCorpus(testFiles)   
    clarityobj = Clarity(trainCorpus,testCorpus)
    clarityScore = clarityobj.ClarityScore()
    ret = printRankedDocs(clarityScore, usedTestFiles)
    if ret == True:
        util.saveSettings(Constants.LAST_CLARITY_DIR, todayDate)
        util.logger.info("Clarity info just completed for ="+todayDate)
    return ret

コード例 #7

0

ファイルを表示

ファイル: Relevance.py プロジェクト: shashankneo/projects

def Relevance():
    todayDate = util.getTodayDateFolder()
    lastRelevanceDate = util.loadSettings(Constants.LAST_RELEVANCE_DIR)
    if todayDate == lastRelevanceDate :
        util.logger.info("Relevance signal already done for today :" + todayDate)
        return True
    trainFiles = util.findTrainingFiles()
    testFiles = util.findTestFiles()
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)
    testCorpus, usedTestFiles = util.findCorpus(testFiles)   
    all_tokens = sum(trainCorpus, [])
    tokens_once = set(word for word in set(all_tokens) if all_tokens.count(word) == 1)
    texts = [[word for word in text if word not in tokens_once]
         for text in trainCorpus]
    pass
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    tfidf = models.TfidfModel(corpus=corpus, id2word=dictionary,normalize=True)
    index = similarities.SparseMatrixSimilarity(tfidf[corpus],num_features=len(dictionary))
    count = 0
    testJson = {}
    for text in testCorpus:
        vec=dictionary.doc2bow(text)
        sims = index[tfidf[vec]]
        score = sum(sims)
        #print(list(enumerate(sims))) 
        testJson[usedTestFiles[count]] = score
        count = count + 1
    ret = printRankedDocs(testJson)
    if ret == True:
        util.saveSettings(Constants.LAST_RELEVANCE_DIR, todayDate)
        util.logger.info("Relevance info just completed for ="+todayDate)
    return ret

コード例 #8

0

ファイルを表示

ファイル: Relevance.py プロジェクト: weberna/SuPrIA

def Relevance():
    todayDate = util.getYesterdayDateFolder()
    lastRelevanceDate = util.loadSettings(Constants.LAST_RELEVANCE_DIR)
    lastSuggRelevanceDate = util.loadSettings(
        Constants.LAST_SUGG_RELEVANCE_DIR)

    if lastRelevanceDate:
        util.logger.info("Google Relevance done last for =" +
                         lastRelevanceDate)
    else:
        util.logger.info("Google Relevance done last for None")

    if lastSuggRelevanceDate:
        util.logger.info("Sugg Relevance done last for =" + lastRelevanceDate)
    else:
        util.logger.info("Sugg Relevance done last for None")

    if todayDate == lastRelevanceDate and todayDate == lastSuggRelevanceDate:
        util.logger.info("Relevance signal already done for today :" +
                         todayDate)
        return True
    trainFiles = util.findTrainingFiles()
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)
    all_tokens = sum(trainCorpus, [])
    tokens_once = set(word for word in set(all_tokens)
                      if all_tokens.count(word) == 1)
    texts = [[word for word in text if word not in tokens_once]
             for text in trainCorpus]
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    tfidf = models.TfidfModel(corpus=corpus,
                              id2word=dictionary,
                              normalize=True)
    index = similarities.SparseMatrixSimilarity(tfidf[corpus],
                                                num_features=len(dictionary))

    normalRelevance = True
    if todayDate != lastRelevanceDate:
        testFiles = util.findTestFiles()
        testCorpus, usedTestFiles = util.findCorpus(testFiles)
        count = 0
        testJson = {}
        for text in testCorpus:
            vec = dictionary.doc2bow(text)
            sims = index[tfidf[vec]]
            score = sum(sims)
            #print(list(enumerate(sims)))
            testJson[usedTestFiles[count]] = score
            count = count + 1
        normalRelevance = printNormalRankedDocs(testJson)
        if normalRelevance == True:
            util.saveSettings(Constants.LAST_RELEVANCE_DIR, todayDate)
            util.logger.info("Google Relevance info just completed for =" +
                             todayDate)

    suggRelevance = True
    if todayDate != lastSuggRelevanceDate:
        testFiles = util.findSuggTestFiles()
        testCorpus, usedTestFiles = util.findCorpus(testFiles)
        count = 0
        testJson = {}
        for text in testCorpus:
            vec = dictionary.doc2bow(text)
            sims = index[tfidf[vec]]
            score = sum(sims)
            #print(list(enumerate(sims)))
            testJson[usedTestFiles[count]] = score
            count = count + 1
        suggRelevance = printSuggRankedDocs(testJson)
        if suggRelevance == True:
            util.saveSettings(Constants.LAST_SUGG_RELEVANCE_DIR, todayDate)
            util.logger.info("Google Relevance info just completed for =" +
                             todayDate)

    return normalRelevance or suggRelevance

コード例 #9

0

ファイルを表示

ファイル: Smoothness.py プロジェクト: weberna/SuPrIA

def Smoothness():
    todayDate = util.getYesterdayDateFolder()
    lastSmoothnessDate = util.loadSettings(Constants.LAST_SMOOTHNESS_DIR)
    lastSuggSmoothnessDate = util.loadSettings(
        Constants.LAST_SUGG_SMOOTHNESS_DIR)

    if lastSmoothnessDate:
        util.logger.info("Google Smoothness done last for =" +
                         lastSmoothnessDate)
    else:
        util.logger.info("Google Smoothness done last for none")

    if lastSuggSmoothnessDate:
        util.logger.info("Sugg Google Smoothness done last for =" +
                         lastSuggSmoothnessDate)
    else:
        util.logger.info("Sugg Google Smoothness done last for none")

    if todayDate == lastSmoothnessDate and todayDate == lastSuggSmoothnessDate:
        util.logger.info("Smoothness signal done for today" + todayDate)
        return True

    trainFiles = util.findTrainingFiles()
    trainFiles = util.random_select(trainFiles)
    trainCorpus, usedTrainFiles = util.findCorpus(trainFiles)
    bm25obj = Bm25(trainCorpus)
    trainUniqueWords = []
    for trainText in trainCorpus:
        trainUniqueWords.append(set(trainText))

    normalSmoothness = True
    if todayDate != lastSmoothnessDate:
        testFiles = util.findTestFiles()
        testCorpus, usedTestFiles = util.findCorpus(testFiles)
        testJson = {}
        testUniqueWords = []
        smoothness = zeros((len(testCorpus), len(trainCorpus)))
        for testText in testCorpus:
            testUniqueWords.append(set(testText))
        for testDoc in range(len(testCorpus)):
            uniqueTest = testUniqueWords[testDoc]
            SminusDcontext = zeros(bm25obj.N)
            DminusScontext = zeros(bm25obj.N)
            for trainDoc in range(len(trainCorpus)):
                uniqueTrain = trainUniqueWords[trainDoc]
                #     t0 = time()
                SminusD = [
                    word for word in trainCorpus[trainDoc]
                    if word not in uniqueTest
                ]
                #    t1 = time()
                #print "time 1 = "+str(t1-t0)
                DminusS = [
                    word for word in testCorpus[testDoc]
                    if word not in uniqueTrain
                ]
                #    t2 = time()
                #       print "time 2 = "+str(t2-t1)
                SminusDcontext = bm25obj.BM25Score(SminusD)
                #    t3 = time()
                #      print "time 3 = "+str(t3-t2)
                DminusScontext = bm25obj.BM25Score(DminusS)
                #    t4 = time()
                #      print "time 4 = "+str(t4-t3)
                smoothness[testDoc][trainDoc] = np.dot(SminusDcontext,
                                                       DminusScontext)
        #      t5 = time()
        #print "time 5 = "+str(t5-t4)
        normalSmoothness = printNormalRankedDocs(smoothness, usedTestFiles)
        if normalSmoothness == True:
            util.saveSettings(Constants.LAST_SMOOTHNESS_DIR, todayDate)
            util.logger.info("Google Smoothness info just completed for =" +
                             todayDate)

    suggSmoothness = True
    if todayDate != lastSuggSmoothnessDate:
        testFiles = util.findSuggTestFiles()
        testCorpus, usedTestFiles = util.findCorpus(testFiles)
        testJson = {}
        testUniqueWords = []
        smoothness = zeros((len(testCorpus), len(trainCorpus)))
        for testText in testCorpus:
            testUniqueWords.append(set(testText))
        for testDoc in range(len(testCorpus)):
            uniqueTest = testUniqueWords[testDoc]
            SminusDcontext = zeros(bm25obj.N)
            DminusScontext = zeros(bm25obj.N)
            for trainDoc in range(len(trainCorpus)):
                uniqueTrain = trainUniqueWords[trainDoc]
                SminusD = [
                    word for word in trainCorpus[trainDoc]
                    if word not in uniqueTest
                ]
                DminusS = [
                    word for word in testCorpus[testDoc]
                    if word not in uniqueTrain
                ]
                SminusDcontext = bm25obj.BM25Score(SminusD)
                DminusScontext = bm25obj.BM25Score(DminusS)
                smoothness[testDoc][trainDoc] = np.dot(SminusDcontext,
                                                       DminusScontext)
        suggSmoothness = printSuggRankedDocs(smoothness, usedTestFiles)
        if suggSmoothness == True:
            util.saveSettings(Constants.LAST_SUGG_SMOOTHNESS_DIR, todayDate)
            util.logger.info("Sugg Smoothness info just completed for =" +
                             todayDate)

    return normalSmoothness or suggSmoothness

コード例 #10

0

ファイルを表示

ファイル: RemoveBoiler.py プロジェクト: weberna/SuPrIA

                    else:
                        result = True
                    if result == True:
                        count = count + 1
                        util.logger.info('Boilered done for sugg_news =' +
                                         html_filename + str(count))
            except Exception, e:
                util.logger.error("Exception at boiler for google news : %s" %
                                  read_directory)
        else:
            pass
        finalJson['suggestGoogle'][Constants.GOOGLE].append(linkObj)

    result = writeBoilerJson(finalJson, downloadDate)
    if result == True:
        util.saveSettings(Constants.LAST_BOILER_SUGGGOOGLENEWS, downloadDate)
        util.logger.info("Sugg Google news boilered for =" + downloadDate +
                         " links=" + str(count) + "total =" +
                         str(len(googleLinks)))
    else:
        util.logger.error("Sugg Google news failed to boilered for =" +
                          downloadDate + " links=" + str(count))
    return result


def BoilerNews(downloadDate):
    jsonData = readBoilerJson(downloadDate)
    if jsonData is None:
        return False
    result = False
    read_directory = os.path.join(Constants.ROOT_FOLDER,

コード例 #11

0

ファイルを表示

ファイル: RemoveBoiler.py プロジェクト: shashankneo/projects

                        if htmlFile in jsonData:
                            jsonData[htmlFile]["content"] = htmlText
                else:
                    result = True
                if result == True:
                    count = count + 1
                else:
                    if htmlFile in jsonData:
                        del jsonData[htmlFile]
                print 'Boilered done for ='+html_filename+str(count)
        except Exception, e:
            util.logger.error( "Exception at boiler for google news : %s" % read_directory)
    if ((count*100)/len(onlyfiles)) > Constants.MIN_GOOGLELINKS_DAILY:
        result = writeBoilerJson(jsonData, downloadDate )
        if result == True:
        util.saveSettings(Constants.LAST_BOILER_GOOGLENEWS, downloadDate)
        util.logger.info("Google news boilered for ="+downloadDate+" links="+str(count))
        
    else:
        util.logger.error("Google news failed to boilered for ="+downloadDate+" links="+str(count))
    return result
        
def BoilerData(downloadDate):
    ret = False
    read_directory = os.path.join(Constants.ROOT_FOLDER,Constants.DATA_DIR,downloadDate)
    write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.BOILER_DATA_DIR,downloadDate)
    if not os.path.exists(read_directory):
        util.logger.error("Boilers data can't be run because folder isn't present = "+downloadDate)
        return ret
    if not os.path.exists(write_directory):
            os.makedirs(write_directory)

コード例 #12

0

ファイルを表示

    browser.get('http://www.news.google.com')
    links = browser.find_elements_by_xpath('//a')
    getLinksPerCategory(links, 'HomePage')

    if len(downloadedLinks) > Constants.MIN_GOOGLELINKS_DAILY:
        linksToBeWritten = "\n".join(downloadedLinks)
        directory = os.path.join(Constants.ROOT_FOLDER,
                                 Constants.RECOMMENDATION_DIR,
                                 Constants.GOOGLE_LINKS_DIR, todayDate)
        if not os.path.exists(directory):
            os.makedirs(directory)
        result = util.writeToFile(
            linksToBeWritten,
            os.path.join(directory, Constants.GOOGLE_LINKS_FILE))
        if result == True:
            util.saveSettings(Constants.LAST_GOOGLELINKS_DOWNLOAD, todayDate)
            util.logger.info("Google links downloaded for =" + todayDate)
            return result

    util.logger.error("Google links not downloaded for =" + todayDate)
    return result


def GoogleNews():
    downloadedLinks = []
    todayDate = util.getTodayDateFolder()
    lastNewsDownloaded = util.loadSettings(Constants.LAST_GOOGLENEWS_DOWNLOAD)
    lastLinksDownloaded = util.loadSettings(
        Constants.LAST_GOOGLELINKS_DOWNLOAD)

    googleLinksStatus = True

コード例 #13

0

ファイルを表示

ファイル: GoogleNews.py プロジェクト: shashankneo/projects

        

    # Get all the main page links.
    #Initialize for it
    browser.get('http://www.news.google.com')
    links = browser.find_elements_by_xpath('//a')
    getLinksPerCategory(links, 'HomePage')
    
    if len(downloadedLinks) > Constants.MIN_GOOGLELINKS_DAILY:
        linksToBeWritten = "\n".join(downloadedLinks)
        directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.GOOGLE_LINKS_DIR,todayDate)
        if not os.path.exists(directory):
            os.makedirs(directory)
        result = util.writeToFile(linksToBeWritten,os.path.join(directory,Constants.GOOGLE_LINKS_FILE))
        if result == True:
            util.saveSettings(Constants.LAST_GOOGLELINKS_DOWNLOAD, todayDate)
            util.logger.info("Google links downloaded for ="+todayDate)
            return result
            
    util.logger.error("Google links not downloaded for ="+todayDate)
    return result        
            
def GoogleNews():
    downloadedLinks = []
    todayDate = util.getTodayDateFolder()
    lastNewsDownloaded = util.loadSettings(Constants.LAST_GOOGLENEWS_DOWNLOAD)
    lastLinksDownloaded = util.loadSettings(Constants.LAST_GOOGLELINKS_DOWNLOAD)
    
    googleLinksStatus = True
    googleNewsStatus = True
    #Check whether today links have been extracted or not