Python readFileContentInList 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: FileUtil

메소드/함수: readFileContentInList

hotexamples.com에서의 예제들: 5

Python readFileContentInList - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 FileUtil.readFileContentInList에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def singleObjectPatternFiltering(patterns,
                                 websiteLocation,
                                 supervisedFileLocation,
                                 preprocessType="None"):
    output = []
    for pattern in patterns:
        (lp, rp) = pattern
        pages = getAllPagesInsideWebsite(websiteLocation)
        patternScore = 0
        for page in pages:
            exactPageLocation = page + "/page.html"
            contentList = readFileContentInList(page + "/" +
                                                supervisedFileLocation)
            singleObj = ""
            if len(contentList) == 1:
                singleObj = contentList[0]
            goldContent = " ".join(singleObj.split())
            pageContent = readPlainHtmlPageContent(exactPageLocation)
            if preprocessType == "NUM":
                pageContent = replaceNumWordsInStr(pageContent)
            results = makeSingleObjectExtractions(pageContent, lp, rp)
            if goldContent in results:
                patternScore += 1
        if patternScore > 0:
            output.append((lp, rp))
    return output

예제 #2

파일 보기

def singleObjectPatternFiltering(patterns,
                                 websiteLocation,
                                 supervisedFileLocation,
                                 artificialSeedSet,
                                 threshold=1000,
                                 preprocessType="None"):
    stats = {}
    output = []
    patternIndex = 0
    for pattern in patterns:
        pages = getAllPagesInsideWebsite(websiteLocation)
        resultsPerPattern = []
        patternIndex += 1
        for page in pages:
            (lp, rp) = pattern
            exactPageLocation = page + "/page.html"
            contentList = readFileContentInList(page + "/" +
                                                supervisedFileLocation)
            singleObj = ""
            if len(contentList) == 1:
                singleObj = contentList[0]
            goldContent = " ".join(singleObj.split())
            expected = [goldContent]
            pageContent = readPlainHtmlPageContent(exactPageLocation)
            computed = makeSingleObjectExtractions(pageContent, lp, rp,
                                                   threshold)
            resultsPerPattern.append(
                (computed, expected, artificialSeedSet[page]))
        stats[patternIndex] = (pattern, resultsPerPattern)
    patterns = getPatternsFromStats(stats)
    return patterns

예제 #3

파일 보기

def clusterPatternFiltering(patterns,
                            websiteLocation,
                            supervisedFileLocation,
                            artificialClusters,
                            preprocessType="None"):
    stats = {}
    output = []
    patternIndex = 0
    for pattern in patterns:
        pages = getAllPagesInsideWebsite(websiteLocation)
        resultsPerPattern = []
        patternIndex += 1
        for page in pages:
            (lp, mp, rp) = pattern
            exactPageLocation = page + "/page.html"
            # contentList = getClusterInsideLeftRightPattern(pageContent)
            contentList = readFileContentInList(page + "/" +
                                                supervisedFileLocation)
            contentList = [" ".join(item.split()) for item in contentList]
            expected = contentList
            expectedArtificial = artificialClusters[page]
            expectedArtificial = [
                " ".join(item.split()) for sublist in expectedArtificial
                for item in sublist
            ]
            pageContent = readPlainHtmlPageContent(exactPageLocation)
            clusters = getClusterInsideLeftRightPattern(
                pageContent, lp, mp, rp)
            computed = []
            for cluster in clusters:
                computed.extend(getElementsOfCluster(cluster, mp))
            # print("Expected is ")
            # print(expected)
            # print("expected Artificial is ")
            # print(expectedArtificial)
            # print("Computed is ")
            # print(computed)
            resultsPerPattern.append((computed, expected, expectedArtificial))
        stats[patternIndex] = (pattern, resultsPerPattern)
    patterns = getPatternsFromStats(stats)
    return patterns

예제 #4

파일 보기

from utils import writeTripletPatternsAsCsv
from ClusterContextExtraction import getClusterContexts
from RegExpPatternFilteringModule import clusterPatternFiltering
from utils import appendPreprocessType, processNumInContext
from SeedExpansionModule import addArtificialClusterSeeds
websiteLocations = getWebsiteLocations(supervisedDataLocation)
print(websiteLocations)
for websiteLocation in websiteLocations:
    pages = getAllPagesInsideWebsite(websiteLocation)
    contexts = []
    artificialClusters = {}
    for page in pages:
        print(page)
        print(websiteLocation)
        exactPageLocation = page + "/page.html"
        clusterElements = readFileContentInList(page + "/" +
                                                supervisedFileName)
        pageContent = readPlainHtmlPageContent(exactPageLocation)
        contextsPerPage = getClusterContexts(pageContent, clusterElements)
        contexts.append(contextsPerPage)
        artificialClusterPerPage = addArtificialClusterSeeds(
            pageContent, clusterElements)
        artificialClusters[page] = artificialClusterPerPage
        realCluster = "\n".join(clusterElements)
        # print("Real Cluster is ")
        # print(realCluster)
        # artificialCluster = ["\n".join(cluster) for cluster in artificialClusterPerPage]
        # artificialCluster = "\n\n\n".join(artificialCluster)
        # print("Artificial Cluster is ")
        # print(artificialCluster)
        # print("Real cluster")
        # realCluster = "\n".join(clusterElements)

예제 #5

파일 보기

from FileUtil import getAllPagesInsideWebsite, readPlainHtmlPageContent
from FileUtil import readFileContentInList
from utils import getSingleObjectContexts, writePairPatternsAsCsv
from SingleObjectPatternsLearningUtil import  learnPatterns
from utils import appendPreprocessType
from utils import  processNumInContext

websiteLocations = getWebsiteLocations(supervisedDataLocation)
print(websiteLocations)
for websiteLocation in websiteLocations:
    pages                = getAllPagesInsideWebsite(websiteLocation)
    singleObjectContexts = []
    singleObjList        = []
    for page in pages:
        exactPageLocation = page + "/page.html"
        contentList       = readFileContentInList(page + "/" + supervisedFileName)
        singleObj         = ""
        if len(contentList)==1:
            singleObj = contentList[0]
        #so that multiple spaces in product title are being removed
        singleObj       = " ".join(singleObj.split())
        pageContent     = readPlainHtmlPageContent(exactPageLocation)
        contextsPerPage = getSingleObjectContexts(pageContent, singleObj)
        if len(contextsPerPage)>0:
            singleObjectContexts.append(contextsPerPage)
        singleObjList.append(singleObj)
    print("You know what...")
    print("Single object contexts are:- ")
    print(singleObjectContexts)
    patterns         = learnPatterns(singleObjectContexts)