예제 #1
0
def writeExportFile(strSQL, strFileName):
    dbCon = sqlite.connect(videoDB)
    dbCur = dbCon.cursor()

    dbCur.execute(strSQL)

    xmlObj = ET.Element("watched")

    for row in dbCur:
        child = ET.SubElement(xmlObj, "file")
        child.text = row[0]

    episodeFileObj = open(strFileName, 'w')
    episodeFileObj.write(ET.tostring(xmlObj))
    episodeFileObj.close()
예제 #2
0
    def Check_Gmp_xml(self, filepath):
        self.ProjectFile = filepath
        # xml 요소 값을 배열에 넣음
        self.Set_XML_element()
        self.Set_XML_element_SubWatershed()
        self.Set_XML_element_WatchPoints()
        self.Set_XML_element_FlowControlGrid()
        self.Set_XML_element_GreenAmptParameter()
        self.Set_XML_element_SoilDepth()
        self.Set_XML_element_LandCover()

        # xml 파싱
        doc = ET.parse(self.ProjectFile)
        root = doc.getroot()

        GRMProject = ET.Element("GRMProject")
        GRMProject.set("xmlns", "http://tempuri.org/GRMProject.xsd")

        ProjectSettings = ET.SubElement(GRMProject, "ProjectSettings")
        for i in range(0, len(self.XML_element)):
            for element in root.findall(
                    '{http://tempuri.org/GRMProject.xsd}ProjectSettings'):
                Datavalue = element.findtext(
                    "{http://tempuri.org/GRMProject.xsd}" +
                    self.XML_element[i])
                if self.XML_element[i] == "GRMSimulationType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "SingleEvent"
                if self.XML_element[i] == "LandCoverDataType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "File"
                if self.XML_element[i] == "SoilTextureDataType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "File"
                if self.XML_element[i] == "SoilDepthDataType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "File"
                if self.XML_element[i] == "RainfallDataType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "TextFileASCgrid"
                if self.XML_element[i] == "FlowDirectionType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "StartsFromE_TauDEM"
                if self.XML_element[i] == "IsParallel" and (Datavalue == "" or
                                                            Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "MaxDegreeOfParallelism" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "-1"
                if self.XML_element[i] == "SimulStartingTime" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "0"
                if self.XML_element[i] == "IsFixedTimeStep" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "false"
                if self.XML_element[i] == "'SimulateInfiltration'" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "SimulateSubsurfaceFlow" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "SimulateBaseFlow" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "SimulateFlowControl" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "CrossSectionType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "CSSingle"
                if self.XML_element[i] == "SingleCSChannelWidthType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "CWGemeration"
                if self.XML_element[i] == "BankSideSlopeRight" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "1.5"
                if self.XML_element[i] == "BankSideSlopeLeft" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "1.5"
                if self.XML_element[i] == "MakeIMGFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "false"
                if self.XML_element[i] == "MakeASCFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "false"
                if self.XML_element[i] == "MakeSoilSaturationDistFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "MakeRfDistFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "MakeRFaccDistFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "MakeFlowDistFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "true"
                if self.XML_element[i] == "PrintOption" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "All"
                if self.XML_element[i] == "WriteLog" and (Datavalue == "" or
                                                          Datavalue == None):
                    Datavalue = "false"
            ET.SubElement(ProjectSettings,
                          self.XML_element[i]).text = Datavalue

        GRM._SubWatershedCount = 0
        for element in root.findall(
                '{http://tempuri.org/GRMProject.xsd}SubWatershedSettings'):
            SubWatershedSettings = ET.SubElement(GRMProject,
                                                 "SubWatershedSettings")
            for i in range(0, len(self.XML_element_SubWatershed)):
                Datavalue = element.findtext(
                    "{http://tempuri.org/GRMProject.xsd}" +
                    self.XML_element_SubWatershed[i])
                if self.XML_element_SubWatershed[i] == "IniSaturation" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "1"
                elif self.XML_element_SubWatershed[i] == "MinSlopeOF" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "0.0001"
                elif self.XML_element_SubWatershed[i] == "MinSlopeChBed" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "0.0001"
                elif self.XML_element_SubWatershed[i] == "MinChBaseWidth" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "00"
                elif self.XML_element_SubWatershed[i] == "ChRoughness" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "0.045"
                elif self.XML_element_SubWatershed[i] == "DryStreamOrder" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "0"
                elif self.XML_element_SubWatershed[i] == "IniFlow" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "0"
                elif self.XML_element_SubWatershed[
                        i] == "UnsaturatedKType" and (Datavalue == ""
                                                      or Datavalue == None):
                    Datavalue = "Linear"
                elif self.XML_element_SubWatershed[
                        i] == "CoefUnsaturatedK" and (Datavalue == ""
                                                      or Datavalue == None):
                    Datavalue = "0.2"
                elif self.XML_element_SubWatershed[
                        i] == "CalCoefLCRoughness" and (Datavalue == ""
                                                        or Datavalue == None):
                    Datavalue = "1"
                elif self.XML_element_SubWatershed[
                        i] == "CalCoefPorosity" and (Datavalue == ""
                                                     or Datavalue == None):
                    Datavalue = "1"
                elif self.XML_element_SubWatershed[
                        i] == "CalCoefWFSuctionHead" and (Datavalue == "" or
                                                          Datavalue == None):
                    Datavalue = "1"
                elif self.XML_element_SubWatershed[
                        i] == "CalCoefHydraulicK" and (Datavalue == ""
                                                       or Datavalue == None):
                    Datavalue = "1"
                elif self.XML_element_SubWatershed[
                        i] == "CalCoefSoilDepth" and (Datavalue == ""
                                                      or Datavalue == None):
                    Datavalue = "1"
                if self.XML_element_SubWatershed[i] == "UserSet" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = "false"
                ET.SubElement(
                    SubWatershedSettings,
                    self.XML_element_SubWatershed[i]).text = Datavalue
            GRM._SubWatershedCount = GRM._SubWatershedCount + 1
        GRM._WatchPointCount = 0
        for element in root.findall(
                '{http://tempuri.org/GRMProject.xsd}WatchPoints'):
            WatchPoints = ET.SubElement(GRMProject, "WatchPoints")
            for i in range(0, len(self.XML_element_WatchPoints)):
                Datavalue = element.findtext(
                    "{http://tempuri.org/GRMProject.xsd}" +
                    self.XML_element_WatchPoints[i])
                if self.XML_element_WatchPoints[i] == "Name" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_WatchPoints[i] == "ColX" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_WatchPoints[i] == "RowY" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                ET.SubElement(WatchPoints,
                              self.XML_element_WatchPoints[i]).text = Datavalue
            GRM._WatchPointCount = GRM._WatchPointCount + 1

        GRM._FlowControlCount = 0
        for element in root.findall(
                '{http://tempuri.org/GRMProject.xsd}FlowControlGrid'):
            FlowControlGrid = ET.SubElement(GRMProject, "FlowControlGrid")
            for i in range(0, len(self.XML_element_FlowControlGrid)):
                Datavalue = element.findtext(
                    "{http://tempuri.org/GRMProject.xsd}" +
                    self.XML_element_FlowControlGrid[i])
                if self.XML_element_FlowControlGrid[i] == "ColX" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "RowY" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "Name" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "ControlType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "DT" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "FlowDataFile" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "IniStorage" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "MaxStorage" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "MaxStorageR" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "ROType" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[i] == "ROConstQ" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_FlowControlGrid[
                        i] == "ROConstQDuration" and (Datavalue == ""
                                                      or Datavalue == None):
                    Datavalue = ""
                ET.SubElement(
                    FlowControlGrid,
                    self.XML_element_FlowControlGrid[i]).text = Datavalue
            GRM._FlowControlCount = GRM._FlowControlCount + 1

        GRM._GreenAmptCount = 0
        for element in root.findall(
                '{http://tempuri.org/GRMProject.xsd}GreenAmptParameter'):
            GreenAmptParameter = ET.SubElement(GRMProject,
                                               "GreenAmptParameter")
            for i in range(0, len(self.XML_element_GreenAmptParameter)):
                Datavalue = element.findtext(
                    "{http://tempuri.org/GRMProject.xsd}" +
                    self.XML_element_GreenAmptParameter[i])
                if self.XML_element_GreenAmptParameter[i] == "GridValue" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[i] == "USERSoil" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[i] == "GRMCode" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[
                        i] == "GRMTextureE" and (Datavalue == ""
                                                 or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[
                        i] == "GRMTextureK" and (Datavalue == ""
                                                 or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[i] == "Porosity" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[
                        i] == "EffectivePorosity" and (Datavalue == ""
                                                       or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[
                        i] == "WFSoilSuctionHead" and (Datavalue == ""
                                                       or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_GreenAmptParameter[
                        i] == "HydraulicConductivity" and (Datavalue == "" or
                                                           Datavalue == None):
                    Datavalue = ""
                ET.SubElement(
                    GreenAmptParameter,
                    self.XML_element_GreenAmptParameter[i]).text = Datavalue
            GRM._GreenAmptCount = GRM._GreenAmptCount + 1

        GRM._SoilDepthCount = 0
        for element in root.findall(
                '{http://tempuri.org/GRMProject.xsd}SoilDepth'):
            SoilDepth = ET.SubElement(GRMProject, "SoilDepth")
            for i in range(0, len(self.XML_element_SoilDepth)):
                if self.XML_element_SoilDepth[i] == "SoilDeptht":
                    Datavalue = element.findtext(
                        "{http://tempuri.org/GRMProject.xsd}SoilDepth")

                else:
                    Datavalue = element.findtext(
                        "{http://tempuri.org/GRMProject.xsd}" +
                        self.XML_element_SoilDepth[i])

                if self.XML_element_SoilDepth[i] == "GridValue" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_SoilDepth[i] == "UserDepthClass" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_SoilDepth[i] == "GRMCode" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_SoilDepth[i] == "SoilDepthClassE" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_SoilDepth[i] == "SoilDepthClassK" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_SoilDepth[i] == "SoilDeptht" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                ET.SubElement(SoilDepth,
                              self.XML_element_SoilDepth[i]).text = Datavalue
            GRM._SoilDepthCount = GRM._SoilDepthCount + 1

        GRM._LandCoverCount = 0
        for element in root.findall(
                '{http://tempuri.org/GRMProject.xsd}LandCover'):
            LandCover = ET.SubElement(GRMProject, "LandCover")
            for i in range(0, len(self.XML_element_LandCover)):
                Datavalue = element.findtext(
                    "{http://tempuri.org/GRMProject.xsd}" +
                    self.XML_element_LandCover[i])
                if self.XML_element_LandCover[i] == "GridValue" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_LandCover[i] == "UserLandCover" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_LandCover[i] == "GRMCode" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_LandCover[i] == "GRMLandCoverE" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_LandCover[i] == "GRMLandCoverK" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                if self.XML_element_LandCover[
                        i] == "RoughnessCoefficient" and (Datavalue == "" or
                                                          Datavalue == None):
                    Datavalue = ""
                if self.XML_element_LandCover[i] == "ImperviousRatio" and (
                        Datavalue == "" or Datavalue == None):
                    Datavalue = ""
                ET.SubElement(LandCover,
                              self.XML_element_LandCover[i]).text = Datavalue
            GRM._LandCoverCount = GRM._LandCoverCount + 1

        ET.ElementTree(GRMProject).write(self.ProjectFile,
                                         encoding="utf-8",
                                         xml_declaration=True)

        with open(self.ProjectFile, 'r') as f:
            content = f.read()
            result = content.replace("SoilDeptht", "SoilDepth")

        f = open(self.ProjectFile, 'w')
        f.write(result)
        f.close()

        ## 줄바꿈과 뛰어쓰기 부분
        #def indent(elem, level=0):
        #    i = "\n" + level * "  "
        #    j = "\n" + (level - 1) * "  "
        #    if len(elem):
        #        if not elem.text or not elem.text.strip():
        #            elem.text = i + "  "
        #        if not elem.tail or not elem.tail.strip():
        #            elem.tail = i
        #        for subelem in elem:
        #            indent(subelem, level + 1)
        #        if not elem.tail or not elem.tail.strip():
        #            elem.tail = i
        #    else:
        #        if level and (not elem.tail or not elem.tail.strip()):
        #            elem.tail = i
        #    return elem

        ## 저장된 파일 다시 불러와서 줄바꿈과 정렬 하기
        #doc = ET.parse(self.ProjectFile)
        #root = doc.getroot()
        #ET.register_namespace("", "http://tempuri.org/GRMProject.xsd")
        #indent(root)
        #doc.write(self.ProjectFile, encoding="utf-8", xml_declaration=True)
        ds = GRMCore.GRMProject()
        ds.ReadXml(self.ProjectFile)
        ds.WriteXml(self.ProjectFile)
        ds.Dispose()
예제 #3
0
    else:
        description[0:140]
        mDesc.append(description)

#Create an rss xml file
#rss = PyRSS2Gen.RSS2(
#title = "Python Project",
#link = "http://rockhopper.monmouth.edu/cs/jchung/cs498gpl/python_project",
#description = "Get the title, link, and description of each article and write them to a rss feed",
#items = []
#)

#Append all the elements into the items array
#for i in range(len(mTitles)):
#    a = PyRSS2Gen.RSSItem(title = mTitles[i], link = mLinks[i], description= mDesc[i])
#    rss.items.append(a)

#rss.write_xml(open('cssenews.rss.xml','w'))

root = ET.Element("channel")

for i in range(len(mTitles)):
    item = ET.SubElement(root, "item")
    title = ET.SubElement(item, "title")
    title.text = mTitles[i]
    link = ET.SubElement(item, "link")
    link.text = mLinks[i]
    description = ET.SubElement(item, "description")
    description.text = mDesc[i]
tree = ET.ElementTree(root)
tree.write("cssenews.rss.xml")
예제 #4
0
    def searchEpisodes(self):
        """Main search loop, iterates through the show tags in input tree, fetching the search results, storing 
        them back in input tree. The search loop will proceed untill the "last existing"  episode is 
        reached - check checkLastDownloads() for info - or till all search results page were parsed."""

        for show in self.inputtree.getroot().getiterator(globals.EL_SHOW):

            showname = show.find(globals.EL_SHOWNAME).text

            #there will always at least one result page, so element holding the number of pages is set initially to 1.
            ET.SubElement(show, globals.EL_NUMPAGES).text = '1'

            #preparing the query for the seach enginge
            globals.SEARCH_QUERY[globals.SEARCHFILTERKEY] = show.find(
                globals.EL_SEARCHFILTER).text.encode(globals.ENC_UTF)
            globals.SEARCH_QUERY[globals.SEARCHSTRKEY] = show.find(
                globals.EL_SEARCHSTR).text.encode(globals.ENC_UTF)

            #let the log know what we are about to do
            if show.find(globals.EL_OLDEST).text:
                self.writeLog(
                    'Show "' + showname + '"' +
                    ': searching for episodes newer than ' +
                    show.find(globals.EL_OLDEST).text, 'message')
            else:
                self.writeLog(
                    'Show "' + showname + '"' +
                    ': searching for all new episodes', 'message')

            #page counter when looping through results
            page = 1
            foundOldestNeeded = 0

            #main search loop
            while page <= int(show.find(
                    globals.EL_NUMPAGES).text) and not foundOldestNeeded:

                #a little more chat with the log
                self.writeLog(
                    'Show "' + showname + '"' +
                    ': Parsing search results page ' + str(page), 'message')

                #last item to be inserted on the search query is the search page number
                globals.SEARCH_QUERY[globals.PAGEKEY] = str(page)

                #encode the request
                req = urllib2.Request(globals.SEARCH_ENGINE_URL,
                                      urllib.urlencode(globals.SEARCH_QUERY),
                                      globals.SEARCH_HEADERS)

                #submit the query to the search engine
                html = self.querySearchEngine(req)

                #download the search results in case if in DEV_MODE
                if globals.DEV_MODE == 'download':
                    self.saveLocalPage(html, self.localPageName(req))

                #for first page, store the number of pages to be parsed in the main tree
                if page == 1:
                    show.find(globals.EL_NUMPAGES
                              ).text = self.parseEpisodeSearch_numpages(html)

                #parse the results into a ElementTree element containing details about the episodes
                episodelist = self.parseEpisodeSearch(html)

                #if episodes exist in disk, check if the downloaded episode list reached the last existing episodes in disk.
                if (show.find(globals.EL_OLDEST).text):

                    foundOldestNeeded = self.checkExistingEpisodes(
                        lastexisting=show.find(globals.EL_OLDEST).text,
                        list=episodelist)

                    #if found last exsiting, strip all elements older than it
                    if foundOldestNeeded:
                        episodelist = self.stripOlderEpisodes(
                            lastexisting=show.find(globals.EL_OLDEST).text,
                            list=episodelist)

                show.append(episodelist)
                page += 1

            #if no results were found, write warning to log
            if show.find(globals.EL_NUMPAGES).text == '':
                self.writeLog(
                    'Show "' + showname + '"' +
                    ': search results have no match', 'warning')
예제 #5
0
    def parseEpisodeSearch(self, html):
        """Parse the search result pages matching information about the episodes. Information found is inserted into self.inputtree"""

        #using BeautifulSoup for mathing the html. For speed, parsing is done only on the chosen tags, by using SoupStrainer
        #before the call to BeautifulSoup. Episode title, description and url information are stored all together
        #in <div class=conteudo-texto> tags in the html code
        trainer = SoupStrainer('div', {'class': 'conteudo-texto'})
        soup = BeautifulSoup(html, parseOnlyThese=trainer)

        #element to hold the list of episodes found
        episodeslist = ET.Element(globals.EL_EPISODELIST)

        #for each <div class=conteudo-texto> tag, matches and store the relevant info
        for tag in soup.contents:

            #subelement to hold episode details
            episodedetails = ET.SubElement(episodeslist,
                                           globals.EL_EPISODEDETAILS)

            #match the episode title and store it as subelement of episodetails
            ET.SubElement(episodedetails,
                          globals.EL_EPISODETITLE).text = tag.h2.a['title']

            #match the episode url and store it as subelement of episodetails
            ET.SubElement(episodedetails,
                          globals.EL_EPISODEURL).text = tag.h2.a['href']

            #match episode id out of the episode url, and store it as subelement of episodetails
            matchstr = 'http\S*GIM(\d{6})\S*html'
            pattern = re.compile(matchstr)
            match = pattern.findall(tag.h2.a['href'])
            ET.SubElement(episodedetails, globals.EL_EPISODEID).text = match[0]

            #match episode description and store it as subelement of episodetails. The whole "contents" field of the tag is retrieved, as there
            #may be sub tags inside the <p> tag
            description = ''
            for piece in tag.p.contents:
                description += piece.string
            ET.SubElement(episodedetails,
                          globals.EL_EPISODEDESCRIPTION).text = description

        #episodes durations are stored in <span class="tempo"> tags in the html code
        trainer = SoupStrainer('span', {'class': 'tempo'})
        soup = BeautifulSoup(html, parseOnlyThese=trainer)

        #we need to iterate through the already existing episodedetails elements and insert the duration as subelement to each.
        iterator = episodeslist.getiterator(globals.EL_EPISODEDETAILS)
        for index, tag in enumerate(soup):
            ET.SubElement(iterator[index],
                          globals.EL_EPISODEDURATION).text = tag.string

        #episodes dates are stored in <td class="coluna-data"> tags in the html code
        trainer = SoupStrainer('td', {'class': 'coluna-data'})
        soup = BeautifulSoup(html, parseOnlyThese=trainer)

        #we need to iterate through the already existing episodedetails elements and insert the duration as subelement to each.
        iterator = episodeslist.getiterator(globals.EL_EPISODEDETAILS)
        for index, tag in enumerate(soup):
            ET.SubElement(iterator[index],
                          globals.EL_EPISODEDATE).text = tag.string

        return episodeslist
예제 #6
0
    def checkLastDownloads(self):
        """For each show, finds the id (globo.com video id) of the newset episode existing in the download 
        directory. This id is inserted as sub element of the show elements in self.inputtree to be 
        used later. At the end, the oldest of these ids is stored as globals.EL_OLDEST in the input tree. This
        will be used later for stop the searching for new episodes"""

        #iterating per show
        for show in self.inputtree.getroot().getiterator(globals.EL_SHOW):

            newest_episodes_found = []

            #for each occurrence of any possible download element
            for download_element in globals.ELS_DOWNLOAD:

                #check actually existing elements
                if show.find(download_element) <> None:

                    showname = show.find(globals.EL_SHOWNAME).text
                    downloaddir = show.find(download_element).text

                    #get directory listing of directory described by download_element
                    dir = os.listdir(downloaddir)

                    #match object for the file matching for this download_element
                    reg = re.compile(globals.FILE_MATCHING[download_element],
                                     re.IGNORECASE)
                    idlist = []

                    #for each entry in the dir listing, matches the file matching
                    #looking for the id, a 6-digit string (check match pattern on globals.FILE_MATCHING)
                    for file in dir:

                        match = reg.findall(file)

                        #'match' is a list (check globals.FILE_MATCHING, grouping). If something is matched.
                        if len(match):

                            #then id is the first element of 'match'. It is stored in idlist
                            idlist.append(match[0])

                    #sort the list of ids, and store the last as subelement to the show element
                    if len(idlist):
                        idlist.sort()
                        self.writeLog(
                            'Show ' + showname + '' +
                            ': id of latest existing episode in ' +
                            downloaddir + ' is ' + str(idlist[-1]), 'message')
                        ET.SubElement(show, download_element +
                                      '_last').text = str(idlist[0])
                        newest_episodes_found.append(idlist[-1])
                    else:
                        self.writeLog(
                            'Show "' + showname + '"' +
                            ': no existing episode found in \"' + downloaddir +
                            '\"', 'message')

            #Store in input tree the oldest of the newest existing episode found in disk.
            # HERE: a change is needed. For each download, a last existing episode should be stored, instead of the oldest newest
            if len(newest_episodes_found):
                newest_episodes_found.sort()
                ET.SubElement(show, globals.EL_OLDEST).text = str(
                    newest_episodes_found[0])
            else:
                ET.SubElement(show, globals.EL_OLDEST).text = ''