def s3BuildOps(conf): """ Compare a source folder with what's already in S3 and given the direction you specify it should figure out what to do. :param src_files: :param keyprefix: :param bucket: :return: """ s3 = Transfer(conf['bucket']) opstore = {} log = Logger("s3BuildOps") prefix = "{0}/".format(conf['keyprefix']).replace("//", "/") log.title('The following locations were found:') if conf['direction'] == S3Operation.Direction.UP: tostr = 's3://{0}/{1}'.format(conf['bucket'], conf['keyprefix']) fromstr = conf['localroot'] else: fromstr = 's3://{0}/{1}'.format(conf['bucket'], conf['keyprefix']) tostr = conf['localroot'] log.info('FROM: {0}'.format(fromstr)) log.info('TO : {0}'.format(tostr)) log.title('The following operations are queued:') response = s3.list(prefix) # Get all the files we have locally files = {} if os.path.isdir(conf['localroot']): files = {} localProductWalker(conf['localroot'], files) # Fill in any files we find on the remote if 'Contents' in response: for result in response['Contents']: dstkey = result['Key'].replace(prefix, '') if dstkey in files: files[dstkey]['dst'] = result else: files[dstkey] = {'dst': result} for relname in files: fileobj = files[relname] opstore[relname] = S3Operation(relname, fileobj, conf) if len(opstore) == 0: log.info("-- NO Operations Queued --") return opstore
class Project(): def __init__(self, projectRoot, projXMLFile): self.log = Logger('Project') self.DOM = None self.getProgramFromXML(path.join(projectRoot, projXMLFile)) self.LocalRoot = projectRoot def getProgramFromXML(self, progXMLpath): assert path.isfile( progXMLpath), "ERROR: could not find file called: {}".format( progXMLpath) self.DOM = ET.parse(progXMLpath).getroot() def getPath(self, program): """ Figure out what the repository path should be :param project: :param program: :return: """ self.log.title('Getting remote path...') # First let's get the project type projType = self.DOM.find('./ProjectType').text.strip() assert not _strnullorempty( projType), "ERROR: <ProjectType> not found in project XML." self.log.info("Project Type Detected: {0}".format(projType)) # Now go get the product node from the program XML patharr = program.findprojpath(projType) assert patharr is not None, "ERROR: Product '{0}' not found anywhere in the program XML".format( projType) self.log.title("Building Path to Product: ".format(projType)) extpath = '' for idx, level in enumerate(patharr): if level['type'] == 'collection': col = self.getcollection(level['name']) self.log.info("{0}/collection:{1} => {2}".format( idx * ' ', level['name'], col)) name = col if program.testAllowedCollection(level['id'], col): name = program.getAllowedLookup(level['id'], col) extpath += '/' + name elif level['type'] == 'group': self.log.info("{0}/group:{1}".format(idx * ' ', level['name'])) extpath += '/' + level['folder'] elif level['type'] == 'product': self.log.info("{0}/product:{1}".format(idx * ' ', level['name'])) extpath += '/' + level['folder'] # Trim the first slash for consistency elsewhere if len(extpath) > 0 and extpath[0] == '/': extpath = extpath[1:] self.log.info("Final remote path to product: {0}".format(extpath)) return extpath def getcollection(self, colname): """ Try to pull the Collection out of the project file :param colname: string with the Collection we're looking for :param project: the ET node with the project xml :return: """ try: val = self.DOM.find( "MetaData/Meta[@name='{0}']".format(colname)).text.strip() except AttributeError: raise ValueError( "ERROR: Could not find <Meta name='{0}'>########</Meta> tag in project XML" .format(colname)) return val
class Program(): def __init__(self, programpath): self.DOM = None self.getProgram(programpath) self.Collections = {} self.Groups = {} self.Products = {} self.Hierarchy = {} self.Bucket = None self.log = Logger('Program') # Populate everything self.getBucket() self.getProjectFile() self.parseCollections() self.parseGroups() self.parseProducts() self.parseTree(self.DOM.find('Hierarchy/*')) def parseCollections(self): """ Pull all the collections out of the program XML :return: """ for col in self.DOM.findall('Definitions/Collections/Collection'): self.Collections[col.attrib['id']] = { 'id': col.attrib['id'], 'type': 'collection', 'name': col.attrib['name'], 'allows': self.parseCollectionAllowed(col.findall('Allow')) } allowType = 'fixed' allows = self.Collections[col.attrib['id']]['allows'] if len(allows) > 0: allowType = allows[0]['type'] self.Collections[col.attrib['id']]['allowtype'] = allowType def getProgram(self, progpath): """ Either uses a local path or downloads an online version of the program XML :param path: :return: """ if re.match('^https*:\/\/.*', progpath) is not None: try: request = urllib2.Request(progpath) request.add_header('Pragma', 'no-cache') file = urllib2.build_opener().open(request) data = file.read() file.close() self.DOM = ET.fromstring(data) except: err = "ERROR: Could not download <{0}>".format(progpath) self.log.error(err) raise ValueError(err) else: self.DOM = ET.parse(progpath).getroot() def parseCollectionAllowed(self, allowETs): allows = [] for allow in allowETs: if 'pattern' in allow.attrib: allows.append({ 'type': 'pattern', 'pattern': allow.attrib['pattern'], }) else: attrs = allow.attrib attrs['type'] = 'fixed' allows.append(attrs) return allows def testAllowedCollection(self, colName, desiredName): """ Test if this is a valid collection to ask for :param collection: :param colName: :return: """ collection = self.Collections[colName] if len(collection['allows']) == 0: return True assert len( desiredName ) > 0, "ERROR: Desired collection name for collection {0} is empty.".format( collection['name']) bGood = False for allow in collection['allows']: if allow['type'] == 'pattern': try: matchObj = re.match(allow['pattern'], desiredName) if matchObj: bGood = True continue except Exception as e: self.log.error( "Something went wrong with the allow RegEx in the Program XML file", e) else: if allow['name'] == desiredName: bGood = True continue elif 'aliases' in allow and desiredName in allow[ 'aliases'].split(','): bGood = True continue assert bGood, "ERROR: Desired Collection: {0} did not pass the allowed values test for collection: {1}".format( desiredName, collection['name']) return bGood def getAllowedLookup(self, colName, desiredName): """ Get the actual allowed name. Most of the time this is just what you pass in but in the case of non-pattern allows this will do a lookup :param collection: :param colName: :return: """ if len(self.Collections[colName]['allows']) == 0: return desiredName name = desiredName for allow in self.Collections[colName]['allows']: if allow['type'] == 'fixed' and allow['name'] == desiredName: name = allow['folder'] continue return name def parseGroups(self): for grp in self.DOM.findall('Definitions/Groups/Group'): self.Groups[grp.attrib['id']] = { 'id': grp.attrib['id'], 'type': 'group', 'name': grp.attrib['name'], 'folder': grp.attrib['folder'] } def parseProducts(self): for prod in self.DOM.findall('Definitions/Products/Product'): self.Products[prod.attrib['id']] = { 'id': prod.attrib['id'], 'type': 'product', 'name': prod.attrib['name'], 'folder': prod.attrib['folder'] } def parseTree(self, etNode, treeNode=None): obj = {} if etNode.tag == 'Product' and 'ref' in etNode.attrib: obj['type'] = 'product' obj['node'] = self.Products[etNode.attrib['ref']] elif etNode.tag in ['Group', 'Collection']: obj['children'] = [] if etNode.tag == 'Group': obj['type'] = 'group' obj['node'] = self.Groups[etNode.attrib['ref']] else: obj['type'] = 'collection' obj['node'] = self.Collections[etNode.attrib['ref']] for child in etNode.getchildren(): obj['children'].append(self.parseTree(child, obj['children'])) if treeNode is None: self.Hierarchy = obj return obj def getProjectFile(self): try: self.ProjectFile = self.DOM.find( "MetaData/Meta[@name='projectfile']").text.strip() self.log.info("Project File we're looking for: {0}".format( self.ProjectFile)) except: msg = "ERROR: No <Meta Name='projectfile'>project.rs.xml</Meta> tag found in program XML" self.log.error(msg) raise ValueError(msg) def getBucket(self): try: self.Bucket = self.DOM.find( "MetaData/Meta[@name='s3bucket']").text.strip() self.log.info("S3 Bucket Detected: {0}".format(self.Bucket)) except: msg = "ERROR: No <Meta Name='s3bucket'>riverscapes</Meta> tag found in program XML" self.log.error(msg) raise ValueError(msg) def getProdPath(self, prodName): self.log.title('Getting remote path structure...') # First let's get the project type assert not _strnullorempty( prodName), "ERROR: <ProjectType> not found in project XML." self.log.info("Project Type Detected: {0}".format(prodName)) # Now go get the product node from the program XML patharr = self.findprojpath(prodName) assert patharr is not None, "ERROR: Product '{0}' not found anywhere in the program XML".format( prodName) self.log.title("Building Path to Product: ".format(prodName)) return patharr def findprojpath(self, prodname, node=None, path=[]): """ Find the path to the desired project :param prodname: :param node: :param path: :return: """ if node is None: node = self.Hierarchy if node['type'] == 'product' and node['node']['name'] == prodname: path.append(node['node']) return path elif node['type'] in ['group', 'collection']: newpath = path[:] newpath.append(node['node']) for child in node['children']: result = self.findprojpath(prodname, child, newpath) if result is not None: return result def progtos3path(self, progpath, level=0, currpath=[], paths=[]): """ A program path to a series of real S3 paths :param progpath: :param level: :param currpath: :param paths: :return: """ # Are we at the end yet? last level must be a product if (level - 1) == len(progpath): currpath.append(progpath[level]) paths.append('/'.join(currpath)) return paths # One choice. Just move on: if len(progpath[level]) == 1: currpath.append(progpath[level]) self.progtos3path(progpath, level + 1, paths) else: for el in progpath[level]: newpath = currpath[:].append(el) self.progtos3path(progpath, level + 1, paths)