Esempio n. 1
0
    def harvest(self):
        """
        @summary: does the harvesting for the given type and url
        """
        try:
            warnings = 0
            self.lastUpdateDate = time.time()
            if self.getTransport() == "FTP":
                ftp = FTPTransport(self.getUrl(), self.getUsername(),
                                   self.getPassword())
                if ftp.message:
                    raise ftp.message
                files = ftp.getFiles()
                for file in files.keys():
                    ext = files[file]
                    #                    if ext[-3:] == "zip":
                    #                        util = ZipUtil(file)
                    #                        contents = util.getFileContentWithExtension("xml")
                    #                        for part in contents:
                    #                            self.createNewMetadata(part[1], ext +"/"+part[0])
                    if ext[-3:] == "xml":
                        warnings += self.createNewMetadata(file, ext)

            if self.getTransport() == "CSW":
                csw = CSWTransport(self.getUrl(), "")
                if csw.message:
                    raise csw.message
                files = csw.getRecords()
                for file in files.keys():
                    warnings += self.createNewMetadata(file, files[file])

            if self.getTransport() == "HTTP":
                print '>2'
                http = HTTPTransport(self.getUrl())
                print '>2!'
                if http.message:
                    print 'Exit message'
                    raise '%s\n%s' % (http.message, self.getUrl())

                print 'Creating Metadata documents.'
                files = http.files
                for file in files.keys():
                    warnings += self.createNewMetadata(file, files[file])
            if warnings:
                return warnings
            else:
                return -1
        except:
            io = StringIO()
            traceback.print_exc(file=io)
            io.seek(0)
            trace = io.read()
            print trace
            self.aq_parent.addLog("Error during harvesting from : " +
                                  self.getUrl(),
                                  trace,
                                  type="Error")
            return 1
Esempio n. 2
0
    def harvest(self):
        """
        @summary: does the harvesting for the given type and url
        """
        try:
            warnings = 0
            self.lastUpdateDate = time.time()
            if self.getTransport() == "FTP":
                ftp = FTPTransport(self.getUrl(), self.getUsername(), self.getPassword())                
                if ftp.message:
                    raise ftp.message
                files = ftp.getFiles()            
                for file in files.keys():
                    ext = files[file]                
#                    if ext[-3:] == "zip":
#                        util = ZipUtil(file)
#                        contents = util.getFileContentWithExtension("xml")
#                        for part in contents:
#                            self.createNewMetadata(part[1], ext +"/"+part[0])
                    if ext[-3:] == "xml":
                        warnings += self.createNewMetadata(file, ext)
            
            if self.getTransport() == "CSW":
                csw = CSWTransport(self.getUrl(), "")            
                if csw.message:
                    raise csw.message
                files = csw.getRecords()
                for file in files.keys():                    
                    warnings += self.createNewMetadata(file, files[file])            
                        
            if self.getTransport() == "HTTP":
                print '>2'
                http = HTTPTransport(self.getUrl())
                print '>2!'
                if http.message:
                    print 'Exit message'
                    raise '%s\n%s' % (http.message, self.getUrl())
                
                print 'Creating Metadata documents.'
                files = http.files
                for file in files.keys():
                    warnings += self.createNewMetadata(file, files[file])
            if warnings:
                return warnings
            else:
                return -1        
        except:
            io = StringIO()
            traceback.print_exc(file=io)
            io.seek(0)            
            trace = io.read()
            print trace
            self.aq_parent.addLog("Error during harvesting from : " + self.getUrl(),trace,type="Error")
            return 1
Esempio n. 3
0
    def harvest(self):
        """
        @summary: does the harvesting for the given type and url
        """
        try:

            harvestStart = time.strftime("%H:%M %y-%m-%d")
            print "Harvesting", self.getUrl()
            mTo = self.getEmail()
            mFrom = "*****@*****.**"
            mSubjTemplate = 'Harvester "%s" progress report (from %s to %s) '
            obj_url = self.absolute_url()
            messageTemplate = """From: CoGIS Portal <%s>
Subject: %s
To: <%s>
Content-Type: text/plain;\n\n"""
            warnings = []
            self.lastUpdateDate = time.time()
            start = time.time()
            avgStats = []
            processed = []

            if self.getTransport() == "FTP":
                ftp = FTPTransport(self.getUrl(), self.getUsername(), self.getPassword())
                if ftp.message:
                    raise "%s\n%s" % (ftp.message, self.getUrl())
                files = ftp.getFiles()

            if self.getTransport() == "CSW":
                csw = CSWTransport(self.getUrl(), "")
                if csw.message:
                    raise "%s\n%s" % (csw.message, self.getUrl())
                files = csw.getRecords()

            if self.getTransport() == "HTTP":
                http = HTTPTransport(self.getUrl())
                if http.message:
                    raise "%s\n%s" % (http.message, self.getUrl())

                files = http.files

            total = len(files)
            count = 0
            total = len(files)

            for file in files.keys():
                start = time.time()
                count += 1
                newwarnings, url = self.createNewMetadata(file, files[file])
                print "newwarnings", newwarnings
                warnings += newwarnings
                processed += [url]
                end = time.time()
                timeTaken = end - start
                avg = end - start
                avgStats += [avg]

                print ">>", count, "of", total, "and", timeTaken, "seconds. AVG:", avg
                if count % 10 == 0:
                    avg = 0
                    for i in avgStats:
                        avg += i
                    avg = avg / ((count / 10) * 10 + 1)

                    mSubj = mSubjTemplate % (self.title_or_id(), str((count / 10 - 1) * 10 + 1), str((count / 10) * 10))
                    mSubj += "of %s documents." % ` total `
                    message = messageTemplate % (mFrom, mSubj, mTo)
                    if warnings:
                        message += (
                            "The following errors occurred, please se the Harvester logs for further details: \n"
                            + str(warnings)
                        )
                    else:
                        message += "The Harvest didn't run into any problems yet.: %s \n" % self.absolute_url()
                    message += "Processed the following urls:\n%s\n" % "\n\t".join(processed)
                    message += "\nThe average processing time was: %s\n" % (avg,)
                    processed = []
                    warnings = []
                    transaction.commit()
                    self.sendemail(mSubj, self.absolute_url(), message)

            harvestEnd = time.strftime("%H:%M %y-%m-%d")

            mSubj = mSubjTemplate % (self.title_or_id(), harvestStart, harvestEnd)
            message = messageTemplate % (mFrom, mSubj, mTo)

            if warnings:
                # return warnings
                message += "The following errors occurred, please se the Harvester logs for further details: \n" + str(
                    "\n".join(warnings)
                )
            else:
                message += "The Harvest was successful: %s \n" % self.absolute_url()

            self.sendemail(mSubj, self.absolute_url(), message)
        except:
            io = StringIO()
            traceback.print_exc(file=io)
            io.seek(0)
            trace = io.read()
            print trace
            self.aq_parent.addLog("Error during harvesting from : " + self.getUrl(), trace, type="Error")
            return 1
Esempio n. 4
0
    def harvest(self):
        """
        @summary: does the harvesting for the given type and url
        """
        try:

            harvestStart = time.strftime("%H:%M %y-%m-%d")
            print 'Harvesting', self.getUrl()
            mTo = self.getEmail()
            mFrom = '*****@*****.**'
            mSubjTemplate = 'Harvester "%s" progress report (from %s to %s) '
            obj_url = self.absolute_url()
            messageTemplate = """From: CoGIS Portal <%s>
Subject: %s
To: <%s>
Content-Type: text/plain;\n\n"""
            warnings = []
            self.lastUpdateDate = time.time()
            start = time.time()
            avgStats = []
            processed = []

            if self.getTransport() == "FTP":
                ftp = FTPTransport(self.getUrl(), self.getUsername(),
                                   self.getPassword())
                if ftp.message:
                    raise '%s\n%s' % (ftp.message, self.getUrl())
                files = ftp.getFiles()

            if self.getTransport() == "CSW":
                csw = CSWTransport(self.getUrl(), "")
                if csw.message:
                    raise '%s\n%s' % (csw.message, self.getUrl())
                files = csw.getRecords()

            if self.getTransport() == "HTTP":
                http = HTTPTransport(self.getUrl())
                if http.message:
                    raise '%s\n%s' % (http.message, self.getUrl())

                files = http.files

            total = len(files)
            count = 0
            total = len(files)

            for file in files.keys():
                start = time.time()
                count += 1
                newwarnings, url = self.createNewMetadata(file, files[file])
                print 'newwarnings', newwarnings
                warnings += newwarnings
                processed += [url]
                end = time.time()
                timeTaken = end - start
                avg = end - start
                avgStats += [avg]

                print '>>', count, 'of', total, 'and', timeTaken, 'seconds. AVG:', avg
                if count % 10 == 0:
                    avg = 0
                    for i in avgStats:
                        avg += i
                    avg = avg / ((count / 10) * 10 + 1)

                    mSubj = mSubjTemplate % (self.title_or_id(),
                                             str((count / 10 - 1) * 10 + 1),
                                             str((count / 10) * 10))
                    mSubj += 'of %s documents.' % ` total `
                    message = messageTemplate % (mFrom, mSubj, mTo)
                    if warnings:
                        message += "The following errors occurred, please se the Harvester logs for further details: \n" + str(
                            warnings)
                    else:
                        message += "The Harvest didn't run into any problems yet.: %s \n" % self.absolute_url(
                        )
                    message += "Processed the following urls:\n%s\n" % '\n\t'.join(
                        processed)
                    message += "\nThe average processing time was: %s\n" % (
                        avg, )
                    processed = []
                    warnings = []
                    transaction.commit()
                    self.sendemail(mSubj, self.absolute_url(), message)

            harvestEnd = time.strftime("%H:%M %y-%m-%d")

            mSubj = mSubjTemplate % (self.title_or_id(), harvestStart,
                                     harvestEnd)
            message = messageTemplate % (mFrom, mSubj, mTo)

            if warnings:
                #return warnings
                message += "The following errors occurred, please se the Harvester logs for further details: \n" + str(
                    '\n'.join(warnings))
            else:
                message += "The Harvest was successful: %s \n" % self.absolute_url(
                )

            self.sendemail(mSubj, self.absolute_url(), message)
        except:
            io = StringIO()
            traceback.print_exc(file=io)
            io.seek(0)
            trace = io.read()
            print trace
            self.aq_parent.addLog("Error during harvesting from : " +
                                  self.getUrl(),
                                  trace,
                                  type="Error")
            return 1