def harvest(self): """ @summary: does the harvesting for the given type and url """ try: warnings = 0 self.lastUpdateDate = time.time() if self.getTransport() == "FTP": ftp = FTPTransport(self.getUrl(), self.getUsername(), self.getPassword()) if ftp.message: raise ftp.message files = ftp.getFiles() for file in files.keys(): ext = files[file] # if ext[-3:] == "zip": # util = ZipUtil(file) # contents = util.getFileContentWithExtension("xml") # for part in contents: # self.createNewMetadata(part[1], ext +"/"+part[0]) if ext[-3:] == "xml": warnings += self.createNewMetadata(file, ext) if self.getTransport() == "CSW": csw = CSWTransport(self.getUrl(), "") if csw.message: raise csw.message files = csw.getRecords() for file in files.keys(): warnings += self.createNewMetadata(file, files[file]) if self.getTransport() == "HTTP": print '>2' http = HTTPTransport(self.getUrl()) print '>2!' if http.message: print 'Exit message' raise '%s\n%s' % (http.message, self.getUrl()) print 'Creating Metadata documents.' files = http.files for file in files.keys(): warnings += self.createNewMetadata(file, files[file]) if warnings: return warnings else: return -1 except: io = StringIO() traceback.print_exc(file=io) io.seek(0) trace = io.read() print trace self.aq_parent.addLog("Error during harvesting from : " + self.getUrl(), trace, type="Error") return 1
def harvest(self): """ @summary: does the harvesting for the given type and url """ try: warnings = 0 self.lastUpdateDate = time.time() if self.getTransport() == "FTP": ftp = FTPTransport(self.getUrl(), self.getUsername(), self.getPassword()) if ftp.message: raise ftp.message files = ftp.getFiles() for file in files.keys(): ext = files[file] # if ext[-3:] == "zip": # util = ZipUtil(file) # contents = util.getFileContentWithExtension("xml") # for part in contents: # self.createNewMetadata(part[1], ext +"/"+part[0]) if ext[-3:] == "xml": warnings += self.createNewMetadata(file, ext) if self.getTransport() == "CSW": csw = CSWTransport(self.getUrl(), "") if csw.message: raise csw.message files = csw.getRecords() for file in files.keys(): warnings += self.createNewMetadata(file, files[file]) if self.getTransport() == "HTTP": print '>2' http = HTTPTransport(self.getUrl()) print '>2!' if http.message: print 'Exit message' raise '%s\n%s' % (http.message, self.getUrl()) print 'Creating Metadata documents.' files = http.files for file in files.keys(): warnings += self.createNewMetadata(file, files[file]) if warnings: return warnings else: return -1 except: io = StringIO() traceback.print_exc(file=io) io.seek(0) trace = io.read() print trace self.aq_parent.addLog("Error during harvesting from : " + self.getUrl(),trace,type="Error") return 1
def harvest(self): """ @summary: does the harvesting for the given type and url """ try: harvestStart = time.strftime("%H:%M %y-%m-%d") print "Harvesting", self.getUrl() mTo = self.getEmail() mFrom = "*****@*****.**" mSubjTemplate = 'Harvester "%s" progress report (from %s to %s) ' obj_url = self.absolute_url() messageTemplate = """From: CoGIS Portal <%s> Subject: %s To: <%s> Content-Type: text/plain;\n\n""" warnings = [] self.lastUpdateDate = time.time() start = time.time() avgStats = [] processed = [] if self.getTransport() == "FTP": ftp = FTPTransport(self.getUrl(), self.getUsername(), self.getPassword()) if ftp.message: raise "%s\n%s" % (ftp.message, self.getUrl()) files = ftp.getFiles() if self.getTransport() == "CSW": csw = CSWTransport(self.getUrl(), "") if csw.message: raise "%s\n%s" % (csw.message, self.getUrl()) files = csw.getRecords() if self.getTransport() == "HTTP": http = HTTPTransport(self.getUrl()) if http.message: raise "%s\n%s" % (http.message, self.getUrl()) files = http.files total = len(files) count = 0 total = len(files) for file in files.keys(): start = time.time() count += 1 newwarnings, url = self.createNewMetadata(file, files[file]) print "newwarnings", newwarnings warnings += newwarnings processed += [url] end = time.time() timeTaken = end - start avg = end - start avgStats += [avg] print ">>", count, "of", total, "and", timeTaken, "seconds. AVG:", avg if count % 10 == 0: avg = 0 for i in avgStats: avg += i avg = avg / ((count / 10) * 10 + 1) mSubj = mSubjTemplate % (self.title_or_id(), str((count / 10 - 1) * 10 + 1), str((count / 10) * 10)) mSubj += "of %s documents." % ` total ` message = messageTemplate % (mFrom, mSubj, mTo) if warnings: message += ( "The following errors occurred, please se the Harvester logs for further details: \n" + str(warnings) ) else: message += "The Harvest didn't run into any problems yet.: %s \n" % self.absolute_url() message += "Processed the following urls:\n%s\n" % "\n\t".join(processed) message += "\nThe average processing time was: %s\n" % (avg,) processed = [] warnings = [] transaction.commit() self.sendemail(mSubj, self.absolute_url(), message) harvestEnd = time.strftime("%H:%M %y-%m-%d") mSubj = mSubjTemplate % (self.title_or_id(), harvestStart, harvestEnd) message = messageTemplate % (mFrom, mSubj, mTo) if warnings: # return warnings message += "The following errors occurred, please se the Harvester logs for further details: \n" + str( "\n".join(warnings) ) else: message += "The Harvest was successful: %s \n" % self.absolute_url() self.sendemail(mSubj, self.absolute_url(), message) except: io = StringIO() traceback.print_exc(file=io) io.seek(0) trace = io.read() print trace self.aq_parent.addLog("Error during harvesting from : " + self.getUrl(), trace, type="Error") return 1
def harvest(self): """ @summary: does the harvesting for the given type and url """ try: harvestStart = time.strftime("%H:%M %y-%m-%d") print 'Harvesting', self.getUrl() mTo = self.getEmail() mFrom = '*****@*****.**' mSubjTemplate = 'Harvester "%s" progress report (from %s to %s) ' obj_url = self.absolute_url() messageTemplate = """From: CoGIS Portal <%s> Subject: %s To: <%s> Content-Type: text/plain;\n\n""" warnings = [] self.lastUpdateDate = time.time() start = time.time() avgStats = [] processed = [] if self.getTransport() == "FTP": ftp = FTPTransport(self.getUrl(), self.getUsername(), self.getPassword()) if ftp.message: raise '%s\n%s' % (ftp.message, self.getUrl()) files = ftp.getFiles() if self.getTransport() == "CSW": csw = CSWTransport(self.getUrl(), "") if csw.message: raise '%s\n%s' % (csw.message, self.getUrl()) files = csw.getRecords() if self.getTransport() == "HTTP": http = HTTPTransport(self.getUrl()) if http.message: raise '%s\n%s' % (http.message, self.getUrl()) files = http.files total = len(files) count = 0 total = len(files) for file in files.keys(): start = time.time() count += 1 newwarnings, url = self.createNewMetadata(file, files[file]) print 'newwarnings', newwarnings warnings += newwarnings processed += [url] end = time.time() timeTaken = end - start avg = end - start avgStats += [avg] print '>>', count, 'of', total, 'and', timeTaken, 'seconds. AVG:', avg if count % 10 == 0: avg = 0 for i in avgStats: avg += i avg = avg / ((count / 10) * 10 + 1) mSubj = mSubjTemplate % (self.title_or_id(), str((count / 10 - 1) * 10 + 1), str((count / 10) * 10)) mSubj += 'of %s documents.' % ` total ` message = messageTemplate % (mFrom, mSubj, mTo) if warnings: message += "The following errors occurred, please se the Harvester logs for further details: \n" + str( warnings) else: message += "The Harvest didn't run into any problems yet.: %s \n" % self.absolute_url( ) message += "Processed the following urls:\n%s\n" % '\n\t'.join( processed) message += "\nThe average processing time was: %s\n" % ( avg, ) processed = [] warnings = [] transaction.commit() self.sendemail(mSubj, self.absolute_url(), message) harvestEnd = time.strftime("%H:%M %y-%m-%d") mSubj = mSubjTemplate % (self.title_or_id(), harvestStart, harvestEnd) message = messageTemplate % (mFrom, mSubj, mTo) if warnings: #return warnings message += "The following errors occurred, please se the Harvester logs for further details: \n" + str( '\n'.join(warnings)) else: message += "The Harvest was successful: %s \n" % self.absolute_url( ) self.sendemail(mSubj, self.absolute_url(), message) except: io = StringIO() traceback.print_exc(file=io) io.seek(0) trace = io.read() print trace self.aq_parent.addLog("Error during harvesting from : " + self.getUrl(), trace, type="Error") return 1