Python ClientUtils Examples

Programming Language: Python

Namespace/Package Name: marklogic.client.clientutils

Class/Type: ClientUtils

Examples at hotexamples.com: 4

Python ClientUtils - 4 examples found. These are the top rated real world Python examples of marklogic.client.clientutils.ClientUtils extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ClientUtils(1)

last_modified(1)

uris(1)

Example #1

Show file

    def connect(self, args):
        """Connect to the server"""
        self.path = os.path.abspath(args['path'])
        self.loadconfig(self.path)

        if args['credentials'] is not None:
            cred = args['credentials']
        else:
            if 'user' in self.config and 'pass' in self.config:
                cred = self.config['user'] + ":" + self.config['pass']
            else:
                cred = None

        try:
            adminuser, adminpass = re.split(":", cred)
        except ValueError:
            raise RuntimeError("Invalid credentials (must be user:pass): {}" \
                                   .format(args['credentials']))

        if args['debug']:
            logging.basicConfig(level=logging.WARNING)
            logging.getLogger("requests").setLevel(logging.INFO)
            logging.getLogger("marklogic").setLevel(logging.DEBUG)

        self.batchsize = args['batchsize']
        self.database = args['database']
        self.dryrun = args['dryrun']
        self.list = args['list']
        self.mirror = args['mirror']
        self.regex = args['regex']
        self.root = args['root']
        self.threshold = args['threshold']
        self.verbose = args['verbose']

        if self.list and self.regex:
            raise RuntimeError("You must not specify both --regex and --list")

        if self.root.endswith("/"):
            self.root = self.root[0:len(self.root) - 1]

        if args['hostname'] is None:
            if 'host' in self.config:
                self.hostname = self.config['host']
                if 'port' in self.config:
                    self.port = self.config['port']
                else:
                    self.port = 8000
                if 'management-port' in self.config:
                    self.management_port = self.config['management-port']
                else:
                    self.management_port = 8002
        else:
            parts = args['hostname'].split(":")
            self.hostname = parts.pop(0)
            self.management_port = 8002
            self.port = 8000
            if parts:
                self.management_port = parts.pop(0)
            if parts:
                self.port = parts.pop(0)

        self.connection \
          = Connection(self.hostname, HTTPDigestAuth(adminuser, adminpass), \
                           port=self.port, management_port=self.management_port)

        self.utils = ClientUtils(self.connection)

Example #2

Show file

File: mldbmirror.py Project: JamFuller/python_api

    def connect(self, args):
        """Connect to the server"""
        self.path = os.path.abspath(args['path'])
        self.loadconfig(self.path)

        if args['credentials'] is not None:
            cred = args['credentials']
        else:
            if 'user' in self.config and 'pass' in self.config:
                cred = self.config['user'] + ":" + self.config['pass']
            else:
                cred = None

        try:
            adminuser, adminpass = re.split(":", cred)
        except ValueError:
            raise RuntimeError("Invalid credentials (must be user:pass): {}" \
                                   .format(args['credentials']))

        if args['debug']:
            logging.basicConfig(level=logging.WARNING)
            logging.getLogger("requests").setLevel(logging.INFO)
            logging.getLogger("marklogic").setLevel(logging.DEBUG)

        self.batchsize = args['batchsize']
        self.database = args['database']
        self.dryrun = args['dryrun']
        self.list = args['list']
        self.mirror = args['mirror']
        self.regex = args['regex']
        self.root = args['root']
        self.threshold = args['threshold']
        self.verbose = args['verbose']

        if self.list and self.regex:
            raise RuntimeError("You must not specify both --regex and --list")

        if self.root.endswith("/"):
            self.root = self.root[0:len(self.root)-1]

        if args['hostname'] is None:
            if 'host' in self.config:
                self.hostname = self.config['host']
                if 'port' in self.config:
                    self.port = self.config['port']
                else:
                    self.port = 8000
                if 'management-port' in self.config:
                    self.management_port = self.config['management-port']
                else:
                    self.management_port = 8002
        else:
            parts = args['hostname'].split(":")
            self.hostname = parts.pop(0)
            self.management_port = 8002
            self.port = 8000
            if parts:
                self.management_port = parts.pop(0)
            if parts:
                self.port = parts.pop(0)

        self.connection \
          = Connection(self.hostname, HTTPDigestAuth(adminuser, adminpass), \
                           port=self.port, management_port=self.management_port)

        self.utils = ClientUtils(self.connection)

Example #3

Show file

class MarkLogicDatabaseMirror:
    def __init__(self):
        self.batchsize = BATCHSIZE
        self.cdir = None
        self.config = None
        self.connection = None
        self.database = None
        self.dryrun = False
        self.hostname = None
        self.list = None
        self.mdir = None
        self.mirror = False
        self.path = None
        self.port = None
        self.management_port = None
        self.regex = []
        self.root = None
        self.threshold = BULKTHRESHOLD
        self.ucdir = None
        self.umdir = None
        self.utils = None
        self.verbose = False
        self.logger = logging.getLogger("marklogic.examples.mldbmirror")

    def connect(self, args):
        """Connect to the server"""
        self.path = os.path.abspath(args['path'])
        self.loadconfig(self.path)

        if args['credentials'] is not None:
            cred = args['credentials']
        else:
            if 'user' in self.config and 'pass' in self.config:
                cred = self.config['user'] + ":" + self.config['pass']
            else:
                cred = None

        try:
            adminuser, adminpass = re.split(":", cred)
        except ValueError:
            raise RuntimeError("Invalid credentials (must be user:pass): {}" \
                                   .format(args['credentials']))

        if args['debug']:
            logging.basicConfig(level=logging.WARNING)
            logging.getLogger("requests").setLevel(logging.INFO)
            logging.getLogger("marklogic").setLevel(logging.DEBUG)

        self.batchsize = args['batchsize']
        self.database = args['database']
        self.dryrun = args['dryrun']
        self.list = args['list']
        self.mirror = args['mirror']
        self.regex = args['regex']
        self.root = args['root']
        self.threshold = args['threshold']
        self.verbose = args['verbose']

        if self.list and self.regex:
            raise RuntimeError("You must not specify both --regex and --list")

        if self.root.endswith("/"):
            self.root = self.root[0:len(self.root) - 1]

        if args['hostname'] is None:
            if 'host' in self.config:
                self.hostname = self.config['host']
                if 'port' in self.config:
                    self.port = self.config['port']
                else:
                    self.port = 8000
                if 'management-port' in self.config:
                    self.management_port = self.config['management-port']
                else:
                    self.management_port = 8002
        else:
            parts = args['hostname'].split(":")
            self.hostname = parts.pop(0)
            self.management_port = 8002
            self.port = 8000
            if parts:
                self.management_port = parts.pop(0)
            if parts:
                self.port = parts.pop(0)

        self.connection \
          = Connection(self.hostname, HTTPDigestAuth(adminuser, adminpass), \
                           port=self.port, management_port=self.management_port)

        self.utils = ClientUtils(self.connection)

    def upload(self):
        """Upload data"""
        trans = Transactions(self.connection)
        if not self.dryrun:
            trans.set_database(self.database)
            trans.set_timeLimit(trans.max_timeLimit())
            trans.create()
            txid = trans.txid()

        mirror = True
        for name in os.listdir(self.path):
            if not name in ['content', 'metadata', 'ucontent', 'umetadata']:
                mirror = False

        if self.mirror and not mirror:
            raise RuntimeError("Path doesn't point to a mirror directory")

        try:
            if mirror:
                self._upload_mirror(trans)
            else:
                self._upload_directory(trans)
        except KeyboardInterrupt:
            if not self.dryrun:
                trans.rollback()
        except:
            if not self.dryrun:
                trans.rollback()
            raise
        else:
            if not self.dryrun:
                trans.commit()

    def _upload_mirror(self, trans):
        """Internal method for uploading a mirror."""

        upload_map = {}

        # Before we start, make sure the mirror isn't corrupted.
        # We check that every content file has a corresponding metadata file.
        # We don't check the other way around; if you delete some content, you
        # don't have to delete the corresponding metadata.
        print("Reading files from filesystem...")

        cpath = "{}/content".format(self.path)
        mpath = "{}/metadata".format(self.path)
        missing = []
        if os.path.exists(cpath):
            files = self.scan(cpath, root=cpath)
            for check in files:
                if os.path.exists(mpath + check):
                    upload_map[check] = {"content": cpath + check, \
                                             "metadata": mpath + check}
                else:
                    missing.append(mpath + check)

        cpath = "{}/ucontent".format(self.path)
        mpath = "{}/umetadata".format(self.path)
        if os.path.exists(cpath):
            files = self.scan(cpath, root=cpath)
            for check in files:
                if os.path.exists(mpath + check):
                    upload_map[check] = {"content": cpath + check, \
                                             "metadata": mpath + check, \
                                             "uuid": True}
                else:
                    missing.append(mpath + check)

        if missing:
            print("Missing files:", missing)
            raise RuntimeError("Mirror corrupt")

        self._upload_map(trans, upload_map)

    def _upload_directory(self, trans):
        """Internal method for uploading a directory."""

        print("Reading files from filesystem...")
        allfiles = self.scan(self.path)

        if self.regex:
            files = self.regex_filter(allfiles)
        elif self.list:
            files = self.list_filter(allfiles)
        else:
            files = allfiles

        if len(files) != len(allfiles):
            print("Selected {} of {} files from filesystem..."\
                      .format(len(files),len(allfiles)))

        upload_map = {}
        for check in files:
            upload_map[check] = {"content": self.path + check}

        self._upload_map(trans, upload_map)

    def _upload_map(self, trans, upload_map):
        """Upload from an internally constructed map."""
        print("Reading URIs from server...")
        uris = self.utils.uris(self.database)
        urihash = {}
        for uri in uris:
            urihash[uri] = 1

        print("Getting timestamps from server...")
        stamps = self.get_timestamps(list(upload_map))
        if not stamps:
            print("No timestamps, assuming all files newer.")
        else:
            uptodate = []
            for key in upload_map:
                if key in stamps:
                    source = upload_map[key]['content']
                    statinfo = os.stat(source)
                    stamp = self._convert_timestamp(stamps[key])
                    if statinfo.st_mtime < stamp.timestamp():
                        uptodate.append(key)

            if uptodate:
                print("{} documents are up-to-date...".format(len(uptodate)))
                for key in uptodate:
                    del upload_map[key]
                    del urihash[
                        key]  # remove it from this list so we don't delete it

        upload_count = len(upload_map)
        print("Uploading {} files...".format(upload_count))

        docs = Documents(self.connection)

        bulk = BulkLoader(self.connection)
        bulk.set_database(self.database)
        bulk.set_txid(trans.txid())

        files = list(upload_map.keys())
        done = not files
        upload_size = 0
        ulcount = 0
        while not done:
            doc = files.pop(0)
            done = not files

            docs.clear()

            source = upload_map[doc]['content']
            target = self.root + doc

            body_content_type = "application/octet-stream"

            if 'metadata' in upload_map[doc]:
                metasource = upload_map[doc]['metadata']
                metaxml = ET.parse(metasource)
                root = metaxml.getroot()

                txml = root \
                  .find("{http://marklogic.com/ns/mldbmirror/}content-type")
                if txml is not None:
                    body_content_type = txml.text
                    root.remove(txml)

                if 'uuid' in upload_map[doc] and upload_map[doc]['uuid']:
                    txml = root \
                      .find("{http://marklogic.com/ns/mldbmirror/}uri")
                    if txml is None:
                        raise RuntimeError("No URI provided in metadata.")
                    else:
                        target = txml.text
                        root.remove(txml)

                text = ET.tostring(root, encoding="unicode", method="xml")
                docs.set_metadata(text, "application/xml")
            else:
                metasource = None
                collections = []
                permissions = []

                for cfg in self.config["config"]:
                    if type(cfg["match"]) is list:
                        matches = cfg["match"]
                    else:
                        matches = [cfg["match"]]

                    for match in matches:
                        if re.match(match, target):
                            if "content-type" in cfg:
                                body_content_type = cfg["content-type"]
                            if "permissions" in cfg:
                                permissions = cfg["permissions"]
                            if "permissions+" in cfg:
                                permissions = permissions + cfg["permissions+"]
                            if "collections" in cfg:
                                collections = cfg["collections"]
                            if "collections+" in cfg:
                                collections = collections + cfg["collections+"]

                docs.set_collections(collections)
                docs.set_permissions(None)
                for perm in permissions:
                    for key in perm:
                        docs.add_permission(key, perm[key])

            if target in urihash:
                del urihash[target]

            ulcount += 1
            statinfo = os.stat(source)
            upload_size += statinfo.st_size

            docs.set_uri(target)

            datafile = open(source, "rb")
            docs.set_content(datafile.read(), body_content_type)
            datafile.close()

            bulk.add(docs)

            if self.verbose:
                print("-> {}".format(target))

            if upload_size > self.threshold:
                perc = (float(ulcount) / upload_count) * 100.0
                print("{0:.0f}% ... {1} files, {2} bytes" \
                          .format(perc, bulk.size(), upload_size))
                if self.dryrun:
                    bulk.clear_content()
                else:
                    bulk.post()
                upload_size = 0

        if bulk.size() > 0:
            perc = (float(ulcount) / upload_count) * 100.0
            print("{0:.0f}% ... {1} files, {2} bytes" \
                      .format(perc, bulk.size(), upload_size))
            if self.dryrun:
                bulk.clear_content()
            else:
                bulk.post()

        docs.clear()
        docs.set_txid(trans.txid())
        docs.set_database(self.database)
        delcount = 0
        for uri in urihash:
            if uri.startswith(self.root):
                if self.verbose:
                    print("DEL {}".format(uri))
                docs.add_uri(uri)
                delcount += 1

        if delcount > 0:
            if self.regex or self.list:
                print("Limited download, not deleting {} files..." \
                          .format(delcount))
            else:
                print("Deleting {} URIs...".format(delcount))
                if not self.dryrun:
                    docs.delete()

    def download(self):
        """Download data"""
        trans = Transactions(self.connection)
        if not self.dryrun:
            trans.set_database(self.database)
            trans.set_timeLimit(trans.max_timeLimit())
            trans.create()

        try:
            if self.mirror:
                self._download_mirror(trans)
            else:
                self._download_directory(trans)
        except KeyboardInterrupt:
            if not self.dryrun:
                trans.rollback()
        except:
            if not self.dryrun:
                trans.rollback()
            raise
        else:
            if not self.dryrun:
                trans.commit()

    def _download_mirror(self, trans):
        """Download mirror"""
        if not os.path.isdir(self.path):
            print("Target directory must exist: {}".format(self.path))
            sys.exit(1)

        if os.listdir(self.path):
            print("Target directory must be empty: {}".format(self.path))
            sys.exit(1)

        self.logger.debug("Starting download_mirror")

        print("Reading URIs from server...")
        alluris = self.utils.uris(self.database, self.root)

        if self.regex:
            uris = self.regex_filter(alluris, download=True)
        elif self.list:
            uris = self.list_filter(alluris, download=True)
        else:
            uris = alluris

        cdir = "{}/content".format(self.path)
        mdir = "{}/metadata".format(self.path)
        ucdir = "{}/ucontent".format(self.path)
        umdir = "{}/umetadata".format(self.path)
        down_map = {}

        for uri in uris:
            if self.can_store_on_filesystem(uri):
                down_map[uri] = {"content": cdir + uri, \
                                     "metadata": mdir + uri}
            else:
                pos = uri.rfind(".")
                if pos > 0:
                    ext = uri[pos:]
                else:
                    ext = ""

                # Put some path segments at the front so that we don't wind up
                # with a single directory containing a gazillion files
                fnuuid = str(uuid.uuid4())
                fnuuid = fnuuid[0:2] + "/" + fnuuid[2:4] + "/" + fnuuid[4:]
                filename = "/{}{}".format(fnuuid, ext)

                down_map[uri] = {"content": ucdir + filename, \
                                     "metadata": umdir + filename, \
                                     "uuid": True}

        self._download_map(trans, down_map)

    def _download_directory(self, trans):
        """Download directory"""
        if not os.path.isdir(self.path):
            print("Target directory must exist: {}".format(self.path))
            sys.exit(1)

        self.logger.debug("Starting download_directory")

        print("Reading URIs from server...")
        alluris = self.utils.uris(self.database, self.root)

        if self.regex:
            uris = self.regex_filter(alluris, download=True)
        elif self.list:
            uris = self.list_filter(alluris, download=True)
        else:
            uris = alluris

        print("Getting timestamps from server...")
        stamps = self.get_timestamps(alluris)

        down_map = {}
        skip_list = []
        for uri in uris:
            if not self.can_store_on_filesystem(uri):
                raise RuntimeError("Cannot save URI:", uri)

            localfile = self.path + uri
            skip = False
            if uri in stamps and os.path.exists(localfile):
                statinfo = os.stat(localfile)
                stamp = self._convert_timestamp(stamps[uri])
                skip = statinfo.st_mtime >= stamp.timestamp()

            if skip:
                skip_list.append(localfile)
            else:
                down_map[uri] = {"content": localfile}
                if uri in stamps:
                    down_map[uri]['timestamp'] = stamps[uri]

        if len(skip_list) > 0:
            print("Skipping {} locally up-to-date files".format(
                len(skip_list)))

        self._download_map(trans, down_map, skip_list)

    def _download_map(self, trans, down_map, skip_list=[]):
        """Download from an internally constructed map."""
        filehash = {}
        if not self.mirror:
            print("Reading files from filesystem...")
            for path in self.scan(self.path):
                filehash[path] = 1

        self.logger.debug("Downloading map")
        self.logger.debug(down_map)

        download_count = len(down_map)

        print("Downloading {} documents...".format(download_count))

        docs = Documents(self.connection)
        docs.set_database(self.database)
        docs.set_txid(trans.txid())
        docs.set_format('xml')
        docs.set_accept("multipart/mixed")
        if self.mirror:
            docs.set_categories(['content', 'metadata'])
        else:
            docs.set_category('content')

        dlprog = 0
        dlcount = 0
        for uri in down_map.keys():
            dlcount += 1
            docs.add_uri(uri)

            if uri in filehash:
                del filehash[uri]

            if dlcount >= self.batchsize:
                dlprog += dlcount
                perc = (float(dlprog) / download_count) * 100.0
                print("{0:.0f}% ... {1}/{2} files" \
                          .format(perc, dlprog, download_count))

                self._download_batch(docs, down_map)
                docs.clear()
                docs.set_database(self.database)
                docs.set_txid(trans.txid())
                docs.set_format('xml')
                docs.set_accept("multipart/mixed")
                if self.mirror:
                    docs.set_categories(['content', 'metadata'])
                else:
                    docs.set_category('content')
                dlcount = 0

        if dlcount > 0:
            perc = (float(dlcount) / download_count) * 100.0
            print("{0:.0f}% ... {1} files".format(perc, dlcount))
            self._download_batch(docs, down_map)

        delfiles = []
        for path in filehash.keys():
            localfile = self.path + path
            if localfile in skip_list:
                pass
            else:
                delfiles.append(localfile)

        if not self.mirror and delfiles:
            if self.regex or self.list:
                print("Limited download, not deleting {} files..." \
                          .format(len(delfiles)))
            else:
                print("Deleting {} files...".format(len(delfiles)))
                if not self.dryrun:
                    for path in delfiles:
                        os.remove(path)
                    self._remove_empty_dirs(self.path)

    def _download_batch(self, docs, down_map):
        """Download a batch of files"""
        if self.dryrun:
            return

        self.logger.debug("Downloading batch")
        self.logger.debug(docs)

        resp = docs.get()
        decoder = MultipartDecoder.from_response(resp)

        self.logger.debug("Downloaded {} bytes in {} parts" \
                              .format(len(resp.text), len(decoder.parts)))

        meta_part = None
        content_part = None
        splitregex = re.compile(';\\s*')

        if not decoder.parts:
            raise RuntimeError("FAILED TO GET ANY PARTS!?")

        for mimepart in decoder.parts:
            disp = mimepart.headers[b'Content-Disposition'].decode('utf-8')
            if 'category=metadata' in disp:
                if meta_part is not None:
                    raise RuntimeError("More than one metadata part!?")
                meta_part = mimepart
            else:
                content_part = mimepart

                disp = content_part.headers[b'Content-Disposition'].decode(
                    'utf-8')
                dispositions = splitregex.split(disp)
                filename = None
                for disp in dispositions:
                    if disp.startswith("filename="):
                        filename = disp[10:len(disp) - 1]

                body_content_type = content_part.headers[
                    b'Content-Type'].decode('utf-8')

                if filename is None:
                    raise RuntimeError("Multipart without filename!?")

                #print("FN:",filename)

                last_modified = None
                stanza = down_map[filename]
                if meta_part is not None:
                    last_modified = self._store_metadata(meta_part, down_map[filename], \
                                                             body_content_type, filename)
                    if last_modified is not None:
                        stanza['timestamp'] = last_modified
                self._store_content(content_part, stanza)
                meta_part = None
                content_part = None

    def _store_metadata(self, meta_part, stanza, body_content_type, uri):
        # fromstring() doesn't return an an xml.etree.ElementTree and
        # doesn't have a write method that we're going to need later
        fakeio = io.StringIO(meta_part.content.decode('utf-8'))
        metaxml = ET.parse(fakeio)
        root = metaxml.getroot()
        fakeio.close()

        last_mod = None
        properties = root.find(
            '{http://marklogic.com/xdmp/property}properties')
        if properties is not None:
            last_mod = properties.find(
                '{http://marklogic.com/xdmp/property}last-modified')
            if last_mod is not None:
                last_mod = last_mod.text

        txml = ET.Element("{http://marklogic.com/ns/mldbmirror/}content-type")
        txml.text = body_content_type
        root.insert(0, txml)

        metafn = stanza['metadata']
        if 'uuid' in stanza:
            txml = ET.Element("{http://marklogic.com/ns/mldbmirror/}uri")
            txml.text = uri
            root.insert(1, txml)

        if not self.dryrun:
            if not os.path.exists(os.path.dirname(metafn)):
                os.makedirs(os.path.dirname(metafn))

            metaxml.write(metafn)
        else:
            if self.verbose:
                print("Meta:", metafn)

        return last_mod

    def _store_content(self, content_part, stanza):
        contfn = stanza['content']

        #print(stanza)

        if 'timestamp' in stanza:
            stamp = self._convert_timestamp(stanza['timestamp'])
        else:
            stamp = None

        if not self.dryrun:
            if not os.path.exists(os.path.dirname(contfn)):
                os.makedirs(os.path.dirname(contfn))

            dataf = open(contfn, 'wb')
            dataf.write(content_part.content)
            dataf.close()
            if stamp is not None:
                os.utime(contfn, (stamp.timestamp(), stamp.timestamp()))
        else:
            if self.verbose:
                print("Data:", contfn)

    def can_store_on_filesystem(self, filename):
        """Returns true if the filename can be stored.

        Many MarkLogic URIs (e.g, /path/to/file.xml) can be stored on the filesystem.
        Many others (e.g., http://...) can not. This method returns true iff the
        filename is a legitimate filesystem name.

        N.B. If a filename does not begin with "/", it cannot be stored on the
        filesystem because if it's ever uploaded, it'll get a leading /.
        """
        if (not filename.startswith("/")) or ("//" in filename) \
          or (":" in filename) or ('"' in filename) or ('"' in filename) \
          or ("\\" in filename):
            return False
        else:
            return True

    def list_filter(self, alluris, download=False):
        if self.list:
            uris = []
            print("Reading list from {}...".format(self.list))
            listfile = open(self.list, "r")
            for line in listfile:
                line = line.rstrip()
                if line in alluris:
                    if self.dryrun and self.verbose:
                        print("INCL:", line)
                    uris.append(line)
                else:
                    print("Not available:", line)

            if download:
                cat = "files"
            else:
                cat = "URIs"

            print("File list reduced {} {} to {}." \
                      .format(len(alluris), cat, len(uris)))

            return uris
        else:
            return alluris

    def get_timestamps(self, uris):
        """Get the database timestamp for these URIs"""
        chunksize = 500
        chunkcount = math.floor((len(uris) + chunksize - 1) / chunksize)
        chunk = 0

        stamps = []
        for seg in self._chunks(uris, chunksize):
            chunk += 1
            if self.verbose:
                print("\t-> chunk {} of {} ...".format(chunk, chunkcount))
            stamps += self.utils.last_modified(self.database, seg)

        # FIXME: make the XQuery return a single object
        stamp_hash = {}
        for stamp in stamps:
            stamp_hash[stamp["uri"]] = stamp["dt"]
        return stamp_hash

    def _chunks(self, uris, n):
        """Chop a long list into a list of lists"""
        for index in range(0, len(uris), n):
            yield uris[index:index + n]

    def regex_filter(self, alluris, download=False):
        if self.regex:
            uris = []
            cregex = []
            for exp in self.regex:
                cregex.append(re.compile(exp))

            for uri in alluris:
                match = False
                for exp in cregex:
                    match = match or exp.match(uri)

                if match:
                    if self.dryrun and self.verbose:
                        print("INCL:", uri)
                    uris.append(uri)
                else:
                    if self.dryrun and self.verbose:
                        print("EXCL:", uri)

            if download:
                cat = "URIs"
            else:
                cat = "files"

            if len(self.regex) == 1:
                print("Regex filter reduced {} {} to {}." \
                          .format(len(alluris), cat, len(uris)))
            elif len(self.regex) > 1:
                print("{} regex filters reduced {} {} to {}." \
                          .format(len(self.regex), cat, len(alluris), len(uris)))

            return uris
        else:
            return alluris

    def _remove_empty_dirs(self, directory):
        """Remove empty directories recursively"""
        for name in os.listdir(directory):
            path = os.path.join(directory, name)
            if os.path.isdir(path):
                self._remove_empty_dirs(path)

        if not os.listdir(directory):
            os.rmdir(directory)

    def scan(self, directory, root=None, files=None):
        """Scan a directory recursively, returning all of the files"""
        if files is None:
            files = []
        if root is None:
            root = directory
        skip = "/" + CONFIGFILE

        for name in os.listdir(directory):
            path = os.path.join(directory, name)
            if os.path.isfile(path):
                if not path.endswith(skip):
                    path = path[len(root):]
                    files.append(path)
            else:
                self.scan(path, root, files)
        return files

    def loadconfig(self, path):
        """Load the configuration file that determines document properties"""
        config = {}
        localconfig = {}

        home = os.path.expanduser("~")
        cfgfile = home + "/" + CONFIGFILE
        if os.path.isfile(cfgfile):
            data = open(cfgfile).read()
            config = json.loads(data)

        cfgfile = path + "/" + CONFIGFILE
        if os.path.isfile(cfgfile):
            data = open(cfgfile).read()
            localconfig = json.loads(data)

        for key in localconfig:
            if key != 'config':
                config[key] = localconfig[key]

        if 'config' not in config:
            config['config'] = []

        if 'config' in localconfig:
            config['config'] = config["config"] + localconfig['config']

        self.config = config

    def _convert_timestamp(self, stamp):
        """ Convert ISO 8601 dateTime to a, uh, datetime """
        if stamp.endswith("Z"):
            stamp = stamp[0:len(stamp) - 1] + "+0000"
        else:
            # Convert ...+05:00 to ...+0500
            stamp = stamp[0:len(stamp) - 3] + stamp[len(stamp) - 2:len(stamp)]
        stamp = datetime.strptime(stamp, "%Y-%m-%dT%H:%M:%S%z")
        return stamp

Example #4

Show file

File: mldbmirror.py Project: JamFuller/python_api

class MarkLogicDatabaseMirror:
    def __init__(self):
        self.batchsize = BATCHSIZE
        self.cdir = None
        self.config = None
        self.connection = None
        self.database = None
        self.dryrun = False
        self.hostname = None
        self.list = None
        self.mdir = None
        self.mirror = False
        self.path = None
        self.port = None
        self.management_port = None
        self.regex = []
        self.root = None
        self.threshold = BULKTHRESHOLD
        self.ucdir = None
        self.umdir = None
        self.utils = None
        self.verbose = False
        self.logger = logging.getLogger("marklogic.examples.mldbmirror")

    def connect(self, args):
        """Connect to the server"""
        self.path = os.path.abspath(args['path'])
        self.loadconfig(self.path)

        if args['credentials'] is not None:
            cred = args['credentials']
        else:
            if 'user' in self.config and 'pass' in self.config:
                cred = self.config['user'] + ":" + self.config['pass']
            else:
                cred = None

        try:
            adminuser, adminpass = re.split(":", cred)
        except ValueError:
            raise RuntimeError("Invalid credentials (must be user:pass): {}" \
                                   .format(args['credentials']))

        if args['debug']:
            logging.basicConfig(level=logging.WARNING)
            logging.getLogger("requests").setLevel(logging.INFO)
            logging.getLogger("marklogic").setLevel(logging.DEBUG)

        self.batchsize = args['batchsize']
        self.database = args['database']
        self.dryrun = args['dryrun']
        self.list = args['list']
        self.mirror = args['mirror']
        self.regex = args['regex']
        self.root = args['root']
        self.threshold = args['threshold']
        self.verbose = args['verbose']

        if self.list and self.regex:
            raise RuntimeError("You must not specify both --regex and --list")

        if self.root.endswith("/"):
            self.root = self.root[0:len(self.root)-1]

        if args['hostname'] is None:
            if 'host' in self.config:
                self.hostname = self.config['host']
                if 'port' in self.config:
                    self.port = self.config['port']
                else:
                    self.port = 8000
                if 'management-port' in self.config:
                    self.management_port = self.config['management-port']
                else:
                    self.management_port = 8002
        else:
            parts = args['hostname'].split(":")
            self.hostname = parts.pop(0)
            self.management_port = 8002
            self.port = 8000
            if parts:
                self.management_port = parts.pop(0)
            if parts:
                self.port = parts.pop(0)

        self.connection \
          = Connection(self.hostname, HTTPDigestAuth(adminuser, adminpass), \
                           port=self.port, management_port=self.management_port)

        self.utils = ClientUtils(self.connection)

    def upload(self):
        """Upload data"""
        trans = Transactions(self.connection)
        if not self.dryrun:
            trans.set_database(self.database)
            trans.set_timeLimit(trans.max_timeLimit())
            trans.create()
            txid = trans.txid()

        mirror = True
        for name in os.listdir(self.path):
            if not name in ['content', 'metadata', 'ucontent', 'umetadata']:
                mirror = False

        if self.mirror and not mirror:
            raise RuntimeError("Path doesn't point to a mirror directory")

        try:
            if mirror:
                self._upload_mirror(trans)
            else:
                self._upload_directory(trans)
        except KeyboardInterrupt:
            if not self.dryrun:
                trans.rollback()
        except:
            if not self.dryrun:
                trans.rollback()
            raise
        else:
            if not self.dryrun:
                trans.commit()

    def _upload_mirror(self, trans):
        """Internal method for uploading a mirror."""

        upload_map = {}

        # Before we start, make sure the mirror isn't corrupted.
        # We check that every content file has a corresponding metadata file.
        # We don't check the other way around; if you delete some content, you
        # don't have to delete the corresponding metadata.
        print("Reading files from filesystem...")

        cpath = "{}/content".format(self.path)
        mpath = "{}/metadata".format(self.path)
        missing = []
        if os.path.exists(cpath):
            files = self.scan(cpath, root=cpath)
            for check in files:
                if os.path.exists(mpath + check):
                    upload_map[check] = {"content": cpath + check, \
                                             "metadata": mpath + check}
                else:
                    missing.append(mpath + check)

        cpath = "{}/ucontent".format(self.path)
        mpath = "{}/umetadata".format(self.path)
        if os.path.exists(cpath):
            files = self.scan(cpath, root=cpath)
            for check in files:
                if os.path.exists(mpath + check):
                    upload_map[check] = {"content": cpath + check, \
                                             "metadata": mpath + check, \
                                             "uuid": True}
                else:
                    missing.append(mpath + check)

        if missing:
            print("Missing files:", missing)
            raise RuntimeError("Mirror corrupt")

        self._upload_map(trans, upload_map)

    def _upload_directory(self, trans):
        """Internal method for uploading a directory."""

        print("Reading files from filesystem...")
        allfiles = self.scan(self.path)

        if self.regex:
            files = self.regex_filter(allfiles)
        elif self.list:
            files = self.list_filter(allfiles)
        else:
            files = allfiles

        if len(files) != len(allfiles):
            print("Selected {} of {} files from filesystem..."\
                      .format(len(files),len(allfiles)))

        upload_map = {}
        for check in files:
            upload_map[check] = {"content": self.path + check}

        self._upload_map(trans, upload_map)

    def _upload_map(self, trans, upload_map):
        """Upload from an internally constructed map."""
        print("Reading URIs from server...")
        uris = self.utils.uris(self.database)
        urihash = {}
        for uri in uris:
            urihash[uri] = 1

        print("Getting timestamps from server...")
        stamps = self.get_timestamps(list(upload_map))
        if not stamps:
            print("No timestamps, assuming all files newer.")
        else:
            uptodate = []
            for key in upload_map:
                if key in stamps:
                    source = upload_map[key]['content']
                    statinfo = os.stat(source)
                    stamp = self._convert_timestamp(stamps[key])
                    if statinfo.st_mtime < stamp.timestamp():
                        uptodate.append(key)

            if uptodate:
                print("{} documents are up-to-date...".format(len(uptodate)))
                for key in uptodate:
                    del upload_map[key]
                    del urihash[key]    # remove it from this list so we don't delete it

        upload_count = len(upload_map)
        print("Uploading {} files...".format(upload_count))

        docs = Documents(self.connection)

        bulk = BulkLoader(self.connection)
        bulk.set_database(self.database)
        bulk.set_txid(trans.txid())

        files = list(upload_map.keys())
        done = not files
        upload_size = 0
        ulcount = 0
        while not done:
            doc = files.pop(0)
            done = not files

            docs.clear()

            source = upload_map[doc]['content']
            target = self.root + doc

            body_content_type = "application/octet-stream"

            if 'metadata' in upload_map[doc]:
                metasource = upload_map[doc]['metadata']
                metaxml = ET.parse(metasource)
                root = metaxml.getroot()

                txml = root \
                  .find("{http://marklogic.com/ns/mldbmirror/}content-type")
                if txml is not None:
                    body_content_type = txml.text
                    root.remove(txml)

                if 'uuid' in upload_map[doc] and upload_map[doc]['uuid']:
                    txml = root \
                      .find("{http://marklogic.com/ns/mldbmirror/}uri")
                    if txml is None:
                        raise RuntimeError("No URI provided in metadata.")
                    else:
                        target = txml.text
                        root.remove(txml)

                text = ET.tostring(root, encoding="unicode", method="xml")
                docs.set_metadata(text, "application/xml")
            else:
                metasource = None
                collections = []
                permissions = []

                for cfg in self.config["config"]:
                    if type(cfg["match"]) is list:
                        matches = cfg["match"]
                    else:
                        matches = [cfg["match"]]

                    for match in matches:
                        if re.match(match, target):
                            if "content-type" in cfg:
                                body_content_type = cfg["content-type"]
                            if "permissions" in cfg:
                                permissions = cfg["permissions"]
                            if "permissions+" in cfg:
                                permissions = permissions + cfg["permissions+"]
                            if "collections" in cfg:
                                collections = cfg["collections"]
                            if "collections+" in cfg:
                                collections = collections + cfg["collections+"]

                docs.set_collections(collections)
                docs.set_permissions(None)
                for perm in permissions:
                    for key in perm:
                        docs.add_permission(key, perm[key])

            if target in urihash:
                del urihash[target]

            ulcount += 1
            statinfo = os.stat(source)
            upload_size += statinfo.st_size

            docs.set_uri(target)

            datafile = open(source, "rb")
            docs.set_content(datafile.read(), body_content_type)
            datafile.close()

            bulk.add(docs)

            if self.verbose:
                print("-> {}".format(target))

            if upload_size > self.threshold:
                perc = (float(ulcount) / upload_count) * 100.0
                print("{0:.0f}% ... {1} files, {2} bytes" \
                          .format(perc, bulk.size(), upload_size))
                if self.dryrun:
                    bulk.clear_content()
                else:
                    bulk.post()
                upload_size = 0

        if bulk.size() > 0:
            perc = (float(ulcount) / upload_count) * 100.0
            print("{0:.0f}% ... {1} files, {2} bytes" \
                      .format(perc, bulk.size(), upload_size))
            if self.dryrun:
                bulk.clear_content()
            else:
                bulk.post()

        docs.clear()
        docs.set_txid(trans.txid())
        docs.set_database(self.database)
        delcount = 0
        for uri in urihash:
            if uri.startswith(self.root):
                if self.verbose:
                    print("DEL {}".format(uri))
                docs.add_uri(uri)
                delcount += 1

        if delcount > 0:
            if self.regex or self.list:
                print("Limited download, not deleting {} files..." \
                          .format(delcount))
            else:
                print("Deleting {} URIs...".format(delcount))
                if not self.dryrun:
                    docs.delete()

    def download(self):
        """Download data"""
        trans = Transactions(self.connection)
        if not self.dryrun:
            trans.set_database(self.database)
            trans.set_timeLimit(trans.max_timeLimit())
            trans.create()

        try:
            if self.mirror:
                self._download_mirror(trans)
            else:
                self._download_directory(trans)
        except KeyboardInterrupt:
            if not self.dryrun:
                trans.rollback()
        except:
            if not self.dryrun:
                trans.rollback()
            raise
        else:
            if not self.dryrun:
                trans.commit()

    def _download_mirror(self, trans):
        """Download mirror"""
        if not os.path.isdir(self.path):
            print("Target directory must exist: {}".format(self.path))
            sys.exit(1)

        if os.listdir(self.path):
            print("Target directory must be empty: {}".format(self.path))
            sys.exit(1)

        self.logger.debug("Starting download_mirror")

        print("Reading URIs from server...")
        alluris = self.utils.uris(self.database, self.root)

        if self.regex:
            uris = self.regex_filter(alluris, download=True)
        elif self.list:
            uris = self.list_filter(alluris, download=True)
        else:
            uris = alluris

        cdir = "{}/content".format(self.path)
        mdir = "{}/metadata".format(self.path)
        ucdir = "{}/ucontent".format(self.path)
        umdir = "{}/umetadata".format(self.path)
        down_map = {}

        for uri in uris:
            if self.can_store_on_filesystem(uri):
                down_map[uri] = {"content": cdir + uri, \
                                     "metadata": mdir + uri}
            else:
                pos = uri.rfind(".")
                if pos > 0:
                    ext = uri[pos:]
                else:
                    ext = ""

                # Put some path segments at the front so that we don't wind up
                # with a single directory containing a gazillion files
                fnuuid = str(uuid.uuid4())
                fnuuid = fnuuid[0:2] + "/" + fnuuid[2:4] + "/" + fnuuid[4:]
                filename = "/{}{}".format(fnuuid, ext)

                down_map[uri] = {"content": ucdir + filename, \
                                     "metadata": umdir + filename, \
                                     "uuid": True}

        self._download_map(trans, down_map)

    def _download_directory(self, trans):
        """Download directory"""
        if not os.path.isdir(self.path):
            print("Target directory must exist: {}".format(self.path))
            sys.exit(1)

        self.logger.debug("Starting download_directory")

        print("Reading URIs from server...")
        alluris = self.utils.uris(self.database, self.root)

        if self.regex:
            uris = self.regex_filter(alluris, download=True)
        elif self.list:
            uris = self.list_filter(alluris, download=True)
        else:
            uris = alluris

        print("Getting timestamps from server...")
        stamps = self.get_timestamps(alluris)

        down_map = {}
        skip_list = []
        for uri in uris:
            if not self.can_store_on_filesystem(uri):
                raise RuntimeError("Cannot save URI:", uri)

            localfile = self.path + uri
            skip = False
            if uri in stamps and os.path.exists(localfile):
                statinfo = os.stat(localfile)
                stamp = self._convert_timestamp(stamps[uri])
                skip = statinfo.st_mtime >= stamp.timestamp()

            if skip:
                skip_list.append(localfile)
            else:
                down_map[uri] = {"content": localfile}
                if uri in stamps:
                    down_map[uri]['timestamp'] = stamps[uri]

        if len(skip_list) > 0:
            print("Skipping {} locally up-to-date files".format(len(skip_list)))

        self._download_map(trans, down_map, skip_list)

    def _download_map(self, trans, down_map, skip_list=[]):
        """Download from an internally constructed map."""
        filehash = {}
        if not self.mirror:
            print("Reading files from filesystem...")
            for path in self.scan(self.path):
                filehash[path] = 1

        self.logger.debug("Downloading map")
        self.logger.debug(down_map)

        download_count = len(down_map)

        print("Downloading {} documents...".format(download_count))

        docs = Documents(self.connection)
        docs.set_database(self.database)
        docs.set_txid(trans.txid())
        docs.set_format('xml')
        docs.set_accept("multipart/mixed")
        if self.mirror:
            docs.set_categories(['content', 'metadata'])
        else:
            docs.set_category('content')

        dlprog = 0
        dlcount = 0
        for uri in down_map.keys():
            dlcount += 1
            docs.add_uri(uri)

            if uri in filehash:
                del filehash[uri]

            if dlcount >= self.batchsize:
                dlprog += dlcount
                perc = (float(dlprog) / download_count) * 100.0
                print("{0:.0f}% ... {1}/{2} files" \
                          .format(perc, dlprog, download_count))

                self._download_batch(docs, down_map)
                docs.clear()
                docs.set_database(self.database)
                docs.set_txid(trans.txid())
                docs.set_format('xml')
                docs.set_accept("multipart/mixed")
                if self.mirror:
                    docs.set_categories(['content', 'metadata'])
                else:
                    docs.set_category('content')
                dlcount = 0

        if dlcount > 0:
            perc = (float(dlcount) / download_count) * 100.0
            print("{0:.0f}% ... {1} files".format(perc, dlcount))
            self._download_batch(docs, down_map)

        delfiles = []
        for path in filehash.keys():
            localfile = self.path + path
            if localfile in skip_list:
                pass
            else:
                delfiles.append(localfile)

        if not self.mirror and delfiles:
            if self.regex or self.list:
                print("Limited download, not deleting {} files..." \
                          .format(len(delfiles)))
            else:
                print("Deleting {} files...".format(len(delfiles)))
                if not self.dryrun:
                    for path in delfiles:
                        os.remove(path)
                    self._remove_empty_dirs(self.path)

    def _download_batch(self, docs, down_map):
        """Download a batch of files"""
        if self.dryrun:
            return

        self.logger.debug("Downloading batch")
        self.logger.debug(docs)

        resp = docs.get()
        decoder = MultipartDecoder.from_response(resp)

        self.logger.debug("Downloaded {} bytes in {} parts" \
                              .format(len(resp.text), len(decoder.parts)))

        meta_part = None
        content_part = None
        splitregex = re.compile(';\\s*')

        if not decoder.parts:
            raise RuntimeError("FAILED TO GET ANY PARTS!?")

        for mimepart in decoder.parts:
            disp = mimepart.headers[b'Content-Disposition'].decode('utf-8')
            if 'category=metadata' in disp:
                if meta_part is not None:
                    raise RuntimeError("More than one metadata part!?")
                meta_part = mimepart
            else:
                content_part = mimepart

                disp = content_part.headers[b'Content-Disposition'].decode('utf-8')
                dispositions = splitregex.split(disp)
                filename = None
                for disp in dispositions:
                    if disp.startswith("filename="):
                        filename = disp[10:len(disp)-1]

                body_content_type = content_part.headers[b'Content-Type'].decode('utf-8')

                if filename is None:
                    raise RuntimeError("Multipart without filename!?")

                #print("FN:",filename)

                last_modified = None
                stanza = down_map[filename]
                if meta_part is not None:
                    last_modified = self._store_metadata(meta_part, down_map[filename], \
                                                             body_content_type, filename)
                    if last_modified is not None:
                        stanza['timestamp'] = last_modified
                self._store_content(content_part, stanza)
                meta_part = None
                content_part = None

    def _store_metadata(self, meta_part, stanza, body_content_type, uri):
        # fromstring() doesn't return an an xml.etree.ElementTree and
        # doesn't have a write method that we're going to need later
        fakeio = io.StringIO(meta_part.content.decode('utf-8'))
        metaxml = ET.parse(fakeio)
        root = metaxml.getroot()
        fakeio.close()

        last_mod = None
        properties = root.find('{http://marklogic.com/xdmp/property}properties')
        if properties is not None:
            last_mod = properties.find('{http://marklogic.com/xdmp/property}last-modified')
            if last_mod is not None:
                last_mod = last_mod.text

        txml = ET.Element("{http://marklogic.com/ns/mldbmirror/}content-type")
        txml.text = body_content_type
        root.insert(0, txml)

        metafn = stanza['metadata']
        if 'uuid' in stanza:
            txml = ET.Element("{http://marklogic.com/ns/mldbmirror/}uri")
            txml.text = uri
            root.insert(1, txml)

        if not self.dryrun:
            if not os.path.exists(os.path.dirname(metafn)):
                os.makedirs(os.path.dirname(metafn))

            metaxml.write(metafn)
        else:
            if self.verbose:
                print("Meta:", metafn)

        return last_mod

    def _store_content(self, content_part, stanza):
        contfn = stanza['content']

        #print(stanza)

        if 'timestamp' in stanza:
            stamp = self._convert_timestamp(stanza['timestamp'])
        else:
            stamp = None

        if not self.dryrun:
            if not os.path.exists(os.path.dirname(contfn)):
                os.makedirs(os.path.dirname(contfn))

            dataf = open(contfn, 'wb')
            dataf.write(content_part.content)
            dataf.close()
            if stamp is not None:
                os.utime(contfn, (stamp.timestamp(), stamp.timestamp()))
        else:
            if self.verbose:
                print("Data:", contfn)

    def can_store_on_filesystem(self, filename):
        """Returns true if the filename can be stored.

        Many MarkLogic URIs (e.g, /path/to/file.xml) can be stored on the filesystem.
        Many others (e.g., http://...) can not. This method returns true iff the
        filename is a legitimate filesystem name.

        N.B. If a filename does not begin with "/", it cannot be stored on the
        filesystem because if it's ever uploaded, it'll get a leading /.
        """
        if (not filename.startswith("/")) or ("//" in filename) \
          or (":" in filename) or ('"' in filename) or ('"' in filename) \
          or ("\\" in filename):
            return False
        else:
            return True

    def list_filter(self, alluris, download=False):
        if self.list:
            uris = []
            print("Reading list from {}...".format(self.list))
            listfile = open(self.list, "r")
            for line in listfile:
                line = line.rstrip()
                if line in alluris:
                    if self.dryrun and self.verbose:
                        print("INCL:", line)
                    uris.append(line)
                else:
                    print("Not available:", line)

            if download:
                cat = "files"
            else:
                cat = "URIs"

            print("File list reduced {} {} to {}." \
                      .format(len(alluris), cat, len(uris)))

            return uris
        else:
            return alluris

    def get_timestamps(self, uris):
        """Get the database timestamp for these URIs"""
        chunksize = 500
        chunkcount = math.floor((len(uris) + chunksize - 1) / chunksize)
        chunk = 0

        stamps = []
        for seg in self._chunks(uris, chunksize):
            chunk += 1
            if self.verbose:
                print("\t-> chunk {} of {} ...".format(chunk, chunkcount))
            stamps += self.utils.last_modified(self.database, seg)

        # FIXME: make the XQuery return a single object
        stamp_hash = {}
        for stamp in stamps:
            stamp_hash[stamp["uri"]] = stamp["dt"]
        return stamp_hash

    def _chunks(self, uris, n):
        """Chop a long list into a list of lists"""
        for index in range(0, len(uris), n):
            yield uris[index:index+n]

    def regex_filter(self, alluris, download=False):
        if self.regex:
            uris = []
            cregex = []
            for exp in self.regex:
                cregex.append(re.compile(exp))

            for uri in alluris:
                match = False
                for exp in cregex:
                    match = match or exp.match(uri)

                if match:
                    if self.dryrun and self.verbose:
                        print("INCL:", uri)
                    uris.append(uri)
                else:
                    if self.dryrun and self.verbose:
                        print("EXCL:", uri)

            if download:
                cat = "URIs"
            else:
                cat = "files"

            if len(self.regex) == 1:
                print("Regex filter reduced {} {} to {}." \
                          .format(len(alluris), cat, len(uris)))
            elif len(self.regex) > 1:
                print("{} regex filters reduced {} {} to {}." \
                          .format(len(self.regex), cat, len(alluris), len(uris)))

            return uris
        else:
            return alluris

    def _remove_empty_dirs(self, directory):
        """Remove empty directories recursively"""
        for name in os.listdir(directory):
            path = os.path.join(directory, name)
            if os.path.isdir(path):
                self._remove_empty_dirs(path)

        if not os.listdir(directory):
            os.rmdir(directory)

    def scan(self, directory, root=None, files=None):
        """Scan a directory recursively, returning all of the files"""
        if files is None:
            files = []
        if root is None:
            root = directory
        skip = "/" + CONFIGFILE

        for name in os.listdir(directory):
            path = os.path.join(directory, name)
            if os.path.isfile(path):
                if not path.endswith(skip):
                    path = path[len(root):]
                    files.append(path)
            else:
                self.scan(path, root, files)
        return files

    def loadconfig(self, path):
        """Load the configuration file that determines document properties"""
        config = {}
        localconfig = {}

        home = os.path.expanduser("~")
        cfgfile = home + "/" + CONFIGFILE
        if os.path.isfile(cfgfile):
            data = open(cfgfile).read()
            config = json.loads(data)

        cfgfile = path + "/" + CONFIGFILE
        if os.path.isfile(cfgfile):
            data = open(cfgfile).read()
            localconfig = json.loads(data)

        for key in localconfig:
            if key != 'config':
                config[key] = localconfig[key]

        if 'config' not in config:
            config['config'] = []

        if 'config' in localconfig:
            config['config'] = config["config"] + localconfig['config']

        self.config = config

    def _convert_timestamp(self, stamp):
        """ Convert ISO 8601 dateTime to a, uh, datetime """
        if stamp.endswith("Z"):
            stamp = stamp[0:len(stamp)-1] + "+0000"
        else:
            # Convert ...+05:00 to ...+0500
            stamp = stamp[0:len(stamp)-3] + stamp[len(stamp)-2:len(stamp)]
        stamp = datetime.strptime(stamp, "%Y-%m-%dT%H:%M:%S%z")
        return stamp