Exemple #1
0
def setup_ssl():
    config.flask.ssl_context = None

    if not config.flask.ssl_context:
        return

    ssl_flask = config.flask.copy()
    ssl_flask.debug = False
    ssl_flask.port = 443

    if isinstance(config.flask.ssl_context, Mapping):
        # EXPECTED PEM ENCODED FILE NAMES
        # `load_cert_chain` REQUIRES CONCATENATED LIST OF CERTS
        tempfile = NamedTemporaryFile(delete=False, suffix=".pem")
        try:
            tempfile.write(
                File(ssl_flask.ssl_context.certificate_file).read_bytes())
            if ssl_flask.ssl_context.certificate_chain_file:
                tempfile.write(
                    File(ssl_flask.ssl_context.certificate_chain_file).
                    read_bytes())
            tempfile.flush()
            tempfile.close()

            context = SSLContext(PROTOCOL_SSLv23)
            context.load_cert_chain(
                tempfile.name,
                keyfile=File(ssl_flask.ssl_context.privatekey_file).abspath)

            ssl_flask.ssl_context = context
        except Exception, e:
            Log.error("Could not handle ssl context construction", cause=e)
        finally:
def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name":repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url, File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1
            )
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error("Can not clone {{repos.url}}, because {{problem}}", {
                            "repos": repo,
                            "problem": line
                        })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()


    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name":repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd", File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1
            )
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error("Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
Exemple #3
0
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")
Exemple #4
0
def _upgrade():
    global _upgraded
    _upgraded = True
    try:
        import sys

        sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
        if python_dll.read_bytes() != sqlite_dll.read_bytes():
            backup = sqlite_dll.backup()
            File.copy(python_dll, sqlite_dll)
    except Exception, e:
        Log.warning("could not upgrade python's sqlite", cause=e)
Exemple #5
0
    def test_read_home(self):
        file = "~/___test_file.json"
        source = "tests/resources/json_ref/simple.json"
        File.copy(File(source), File(file))
        content = jsons.ref.get("file://"+file)

        try:
            self.assertEqual(
                content,
                {"test_key": "test_value"}
            )
        finally:
            File(file).delete()
Exemple #6
0
def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings
Exemple #7
0
def path2fullpath(path):
    fullpath = "file:///" + File(path).abspath.replace("\\", "/")
    if fullpath.find("#") >= 0:
        fullpath = fullpath.replace("#", "#log=" + LOG_DIV + "&")
    else:
        fullpath = fullpath + "#log=" + LOG_DIV
    return fullpath
 def __init__(self, settings):
     self.settings = wrap({"host": "fake", "index": "fake"})
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except IOError:
         self.data = Dict()
Exemple #9
0
 def __init__(self, filename, host="fake", index="fake", settings=None):
     self.settings = settings
     self.filename = settings.filename
     try:
         self.data = convert.json2value(File(self.filename).read())
     except Exception:
         self.data = Dict()
Exemple #10
0
def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)
def _run_remote(command, name):
    File("./results/temp/" + name +
         ".sh").write("nohup " + command +
                      " >& /dev/null < /dev/null &\nsleep 20")
    put("./results/temp/" + name + ".sh", "" + name + ".sh")
    run("chmod u+x " + name + ".sh")
    run("./" + name + ".sh")
Exemple #12
0
def _get_attr(obj, path):
    if not path:
        return obj

    attr_name = path[0]

    if isinstance(obj, ModuleType):
        if attr_name in obj.__dict__:
            return _get_attr(obj.__dict__[attr_name], path[1:])
        elif attr_name in dir(obj):
            return _get_attr(obj[attr_name], path[1:])

        # TRY FILESYSTEM
        from pyLibrary.env.files import File
        possible_error = None
        if File.new_instance(File(obj.__file__).parent,
                             attr_name).set_extension("py").exists:
            try:
                # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE
                # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE
                if len(path) == 1:
                    # GET MODULE OBJECT
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[0]], 0)
                    return output
                else:
                    # GET VARIABLE IN MODULE
                    output = __import__(obj.__name__ + "." + attr_name,
                                        globals(), locals(), [path[1]], 0)
                    return _get_attr(output, path[1:])
            except Exception, e:
                from pyLibrary.debugs.exceptions import Except
                possible_error = Except.wrap(e)

        # TRY A CASE-INSENSITIVE MATCH
        attr_name = lower_match(attr_name, dir(obj))
        if not attr_name:
            from pyLibrary.debugs.logs import Log
            Log.warning(PATH_NOT_FOUND + ". Returning None.",
                        cause=possible_error)
        elif len(attr_name) > 1:
            from pyLibrary.debugs.logs import Log
            Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name)
        else:
            return _get_attr(obj[attr_name[0]], path[1:])
def _start_es():
    File("./results/temp/start_es.sh").write(
        "nohup ./bin/elasticsearch >& /dev/null < /dev/null &\nsleep 20")
    with cd("/home/ec2-user/"):
        put("./results/temp/start_es.sh", "start_es.sh")
        run("chmod u+x start_es.sh")

    with cd("/usr/local/elasticsearch/"):
        sudo("/home/ec2-user/start_es.sh")
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")
Exemple #15
0
    def run_job(self, job):
        process = Process(name=job.name,
                          params=job.command,
                          cwd=job.directory,
                          env=job.environment)

        # DIRECT OUTPUT TO FILES
        self.add_file(
            process.stdout,
            coalesce(job.stdout,
                     File.newInstance(self.settings.log.directory, job.name)))
    def test_51586(self):
        debug_settings = {
            "trace": True,
            "cprofile": {
                "enabled": True,
                "filename": "tests/results/test_51586_profile.tab"
            }
        }
        Log.start(debug_settings)

        source_key = "51586_5124145.52"
        content = File("tests/resources/51586_5124145.52.json.gz").read_bytes()
        source = Dict(read_lines=lambda: GzipLines(content))
        with Accumulator(
                File("tests/results/51586_5124145.52.json")) as destination:
            with Timer("ETL file"):
                process_unittest_in_s3(source_key,
                                       source,
                                       destination,
                                       please_stop=None)
        Log.stop()
    def extend(self, records):
        """
        JUST SO WE MODEL A Queue
        """
        records = {v["id"]: v["value"] for v in records}

        unwrap(self.data).update(records)

        data_as_json = convert.value2json(self.data, pretty=True)

        File(self.filename).write(data_as_json)
        Log.note("{{num}} documents added", num=len(records))
def main(settings):
    current_time = datetime.utcnow()
    time_file = File(settings.param.last_replication_time)

    # SYNCH WITH source ES INDEX
    source = Index(settings.source)
    destination = Cluster(settings.destination).get_or_create_index(settings.destination)

    # GET LAST UPDATED
    from_file = None
    if time_file.exists:
        from_file = convert.milli2datetime(convert.value2int(time_file.read()))
    from_es = get_last_updated(destination) - timedelta(hours=1)
    last_updated = MIN(coalesce(from_file, convert.milli2datetime(0)), from_es)
    Log.note("updating records with modified_ts>={{last_updated}}", {"last_updated": last_updated})

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, batch_size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(convert.datetime2milli(current_time)))
Exemple #19
0
def write_profile(profile_settings, stats):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    acc = stats[0]
    for s in stats[1:]:
        acc.add(s)

    stats = [{
        "num_calls": d[1],
        "self_time": d[2],
        "total_time": d[3],
        "self_time_per_call": d[2] / d[1],
        "total_time_per_call": d[3] / d[1],
        "file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
        "line": f[1],
        "method": f[2].lstrip("<").rstrip(">")
    } for f, d, in acc.stats.iteritems()]
    stats_file = File(profile_settings.filename,
                      suffix=convert.datetime2string(datetime.now(),
                                                     "_%Y%m%d_%H%M%S"))
    stats_file.write(convert.list2tab(stats))
Exemple #20
0
def write_profile(profile_settings, stats):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    acc = stats[0]
    for s in stats[1:]:
        acc.add(s)

    stats = [{
        "num_calls": d[1],
        "self_time": d[2],
        "total_time": d[3],
        "self_time_per_call": d[2] / d[1],
        "total_time_per_call": d[3] / d[1],
        "file": (f[0] if f[0] != "~" else "").replace("\\", "/"),
        "line": f[1],
        "method": f[2].lstrip("<").rstrip(">")
    }
        for f, d, in acc.stats.iteritems()
    ]
    stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
    stats_file.write(convert.list2tab(stats))
Exemple #21
0
def _read_file(filename):
    try:
        file = File.new_instance(STATIC_DIRECTORY, filename)
        if not file.abspath.startswith(STATIC_DIRECTORY.abspath):
            return "", 404, "text/html"

        Log.note("Read {{file}}", file=file.abspath)
        mimetype, encoding = mimetypes.guess_type(file.extension)
        if not mimetype:
            mimetype = "text/html"
        return file.read_bytes(), 200, mimetype
    except Exception:
        return "", 404, "text/html"
Exemple #22
0
def write(profile_settings):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    profs = list(profiles.values())
    for p in profs:
        p.stats = p.stats.end()

    stats = [{
        "description": p.description,
        "num_calls": p.stats.count,
        "total_time": p.stats.count * p.stats.mean,
        "total_time_per_call": p.stats.mean
    } for p in profs if p.stats.count > 0]
    stats_file = File(profile_settings.filename,
                      suffix=convert.datetime2string(datetime.now(),
                                                     "_%Y%m%d_%H%M%S"))
    if stats:
        stats_file.write(convert.list2tab(stats))
    else:
        stats_file.write("<no profiles>")

    stats_file2 = File(profile_settings.filename,
                       suffix=convert.datetime2string(datetime.now(),
                                                      "_series_%Y%m%d_%H%M%S"))
    if not profs:
        return

    max_samples = MAX([len(p.samples) for p in profs if p.samples])
    if not max_samples:
        return

    r = range(max_samples)
    profs.insert(0, Dict(description="index", samples=r))
    stats = [{p.description: wrap(p.samples)[i]
              for p in profs if p.samples} for i in r]
    if stats:
        stats_file2.write(convert.list2tab(stats))
Exemple #23
0
 def _worker(self, please_stop):
     if Sqlite.canonical:
         self.db = Sqlite.canonical
     else:
         self.db = sqlite3.connect(':memory:')
         try:
             full_path = File(
                 "pyLibrary/vendor/sqlite/libsqlitefunctions.so").abspath
             # self.db.execute("SELECT sqlite3_enable_load_extension(1)")
             self.db.enable_load_extension(True)
             self.db.execute("SELECT load_extension('" + full_path + "')")
         except Exception, e:
             Log.warning(
                 "loading sqlite extension functions failed, doing without. (no SQRT for you!)",
                 cause=e)
Exemple #24
0
 def execute_file(filename,
                  host,
                  username,
                  password,
                  schema=None,
                  param=None,
                  ignore_errors=False,
                  settings=None):
     # MySQLdb provides no way to execute an entire SQL file in bulk, so we
     # have to shell out to the commandline client.
     sql = File(filename).read()
     if ignore_errors:
         try:
             MySQL.execute_sql(sql=sql, param=param, settings=settings)
         except Exception, e:
             pass
Exemple #25
0
def _upgrade():
    global _upgraded
    _upgraded = True
    try:
        import sys

        sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
        if python_dll.read_bytes() != sqlite_dll.read_bytes():
            backup = sqlite_dll.backup()
            File.copy(python_dll, sqlite_dll)
    except Exception, e:
        Log.warning("could not upgrade python's sqlite", cause=e)
    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " +
                         self.file.name)
        self.please_stop = Signal()
        self.db = Dict()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                try:
                    delta = convert.json2value(line)
                    apply_delta(self.db, delta)
                except:
                    pass
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                try:
                    if k != "status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                except Exception:
                    pass  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost", num=lost)

            if DEBUG:
                Log.note("Persistent queue {{name}} found with {{num}} items",
                         name=self.file.abspath,
                         num=len(self))
        else:
            self.db.status = Dict(start=0, end=0)
            self.start = self.db.status.start
            if DEBUG:
                Log.note("New persistent queue {{name}}",
                         name=self.file.abspath)
    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " + self.file.name)
        self.please_stop = Signal()
        self.db = Dict()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                try:
                    delta = convert.json2value(line)
                    apply_delta(self.db, delta)
                except:
                    pass
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                try:
                    if k!="status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                except Exception:
                    pass  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost",  num= lost)

            if DEBUG:
                Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self))
        else:
            self.db.status = Dict(
                start=0,
                end=0
            )
            self.start = self.db.status.start
            if DEBUG:
                Log.note("New persistent queue {{name}}", name=self.file.abspath)
Exemple #28
0
def _get_attr(obj, path):
    if not path:
        return obj

    attr_name = path[0]

    if isinstance(obj, ModuleType):
        if attr_name in obj.__dict__:
            return _get_attr(obj.__dict__[attr_name], path[1:])
        elif attr_name in dir(obj):
            return _get_attr(obj[attr_name], path[1:])

        # TRY FILESYSTEM
        from pyLibrary.env.files import File

        if File.new_instance(File(obj.__file__).parent, attr_name).set_extension("py").exists:
            try:
                # THIS CASE IS WHEN THE __init__.py DOES NOT IMPORT THE SUBDIR FILE
                # WE CAN STILL PUT THE PATH TO THE FILE IN THE from CLAUSE
                if len(path) == 1:
                    # GET MODULE OBJECT
                    output = __import__(obj.__name__ + "." + attr_name, globals(), locals(), [path[0]], 0)
                    return output
                else:
                    # GET VARIABLE IN MODULE
                    output = __import__(obj.__name__ + "." + attr_name, globals(), locals(), [path[1]], 0)
                    return _get_attr(output, path[1:])
            except Exception, e:
                pass

        # TRY A CASE-INSENSITIVE MATCH
        attr_name = lower_match(attr_name, dir(obj))
        if not attr_name:
            from pyLibrary.debugs.logs import Log

            Log.error(PATH_NOT_FOUND)
        elif len(attr_name) > 1:
            from pyLibrary.debugs.logs import Log

            Log.error(AMBIGUOUS_PATH_FOUND + " {{paths}}", paths=attr_name)
        else:
            return _get_attr(obj[attr_name[0]], path[1:])
Exemple #29
0
def main():
    try:
        config = startup.read_settings(defs=[{
            "name": ["--file"],
            "help": "file to save backup",
            "type": str,
            "dest": "file",
            "required": True
        }])
        constants.set(config.constants)
        Log.start(config.debug)

        sq = elasticsearch.Index(settings=config.saved_queries)
        result = sq.search({"query": {"match_all": {}}, "size": 200000})

        File(config.args.file).write("".join(
            map(convert.json2value, result.hits.hits)))

    except Exception, e:
        Log.error("Problem with etl", e)
Exemple #30
0
def write(profile_settings):
    from pyLibrary import convert
    from pyLibrary.env.files import File

    profs = list(profiles.values())
    for p in profs:
        p.stats = p.stats.end()

    stats = [
        {
            "description": p.description,
            "num_calls": p.stats.count,
            "total_time": p.stats.count * p.stats.mean,
            "total_time_per_call": p.stats.mean,
        }
        for p in profs
        if p.stats.count > 0
    ]
    stats_file = File(profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_%Y%m%d_%H%M%S"))
    if stats:
        stats_file.write(convert.list2tab(stats))
    else:
        stats_file.write("<no profiles>")

    stats_file2 = File(
        profile_settings.filename, suffix=convert.datetime2string(datetime.now(), "_series_%Y%m%d_%H%M%S")
    )
    if not profs:
        return

    max_samples = MAX([len(p.samples) for p in profs if p.samples])
    if not max_samples:
        return

    r = range(max_samples)
    profs.insert(0, Dict(description="index", samples=r))
    stats = [{p.description: wrap(p.samples)[i] for p in profs if p.samples} for i in r]
    if stats:
        stats_file2.write(convert.list2tab(stats))
class TextLog_usingFile(TextLog):
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")

    def write(self, template, params):
        with self.file_lock:
            self.file.append(expand_template(template, params))
Exemple #32
0
class TextLog_usingFile(TextLog):
    def __init__(self, file):
        assert file

        from pyLibrary.env.files import File

        self.file = File(file)
        if self.file.exists:
            self.file.backup()
            self.file.delete()

        self.file_lock = Lock("file lock for logging")

    def write(self, template, params):
        with self.file_lock:
            self.file.append(expand_template(template, params))
Exemple #33
0
 def execute_file(self, filename, param=None):
     content = File(filename).read()
     self.execute(content, param)
Exemple #34
0
def main():
    global BATCH_SIZE

    current_time = Date.now()
    time_file = File(config.last_replication_time)

    # SYNCH WITH source ES INDEX
    source = elasticsearch.Index(config.source)
    destination = elasticsearch.Cluster(config.destination).get_or_create_index(config.destination)

    # GET LAST UPDATED
    if config.since != None:
        last_updated = Date(config.since).unix
    else:
        last_updated = get_last_updated(destination)

    if config.batch_size:
        BATCH_SIZE = config.batch_size

    Log.note("updating records with {{primary_field}}>={{last_updated}}", last_updated=last_updated,
             primary_field=config.primary_field)

    please_stop = Signal()
    done = Signal()

    def worker(please_stop):
        pending = Queue("pending ids", max=BATCH_SIZE*3, silent=False)

        pending_thread = Thread.run(
            "get pending",
            get_pending,
            source=source,
            since=last_updated,
            pending_bugs=pending,
            please_stop=please_stop
        )
        diff_thread = Thread.run(
            "diff",
            diff,
            source,
            destination,
            pending,
            please_stop=please_stop
        )
        replication_thread = Thread.run(
            "replication",
            replicate,
            source,
            destination,
            pending,
            config.fix,
            please_stop=please_stop
        )
        pending_thread.join()
        diff_thread.join()
        pending.add(Thread.STOP)
        replication_thread.join()
        done.go()
        please_stop.go()

    Thread.run("wait for replication to finish", worker, please_stop=please_stop)
    Thread.wait_for_shutdown_signal(please_stop=please_stop)

    if done:
        Log.note("done all")
        # RECORD LAST UPDATED< IF WE DID NOT CANCEL OUT
        time_file.write(unicode(current_time.milli))
Exemple #35
0
                        finally:
                            signal.go()
                    else:
                        try:
                            self.db.execute(command)
                        except Exception, e:
                            e = Except.wrap(e)
                            e.cause = Except(
                                type=ERROR,
                                template="Bad call to Sqlite",
                                trace=trace
                            )
                            Log.warning("Failure to execute", cause=e)

        except Exception, e:
            Log.error("Problem with sql thread", e)
        finally:
            self.db.close()


try:
    import sys

    sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
    python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
    if python_dll.read_bytes() != sqlite_dll.read_bytes():
        backup = sqlite_dll.backup()
        File.copy(python_dll, sqlite_dll)
except Exception, e:
    Log.warning("could not upgrade python's sqlite", cause=e)
Exemple #36
0
def main():
    """
    CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE
    """
    try:
        settings = startup.read_settings(defs=[{
            "name": ["--bucket"],
            "help": "bucket to reprocess",
            "type": str,
            "dest": "bucket",
            "required": True
        }, {
            "name": ["--begin", "--start"],
            "help": "lowest key (or prefix) to reprocess",
            "type": str,
            "dest": "start",
            "default": "1",
            "required": False
        }, {
            "name": ["--end", "--stop"],
            "help": "highest key (or prefix) to reprocess",
            "type": str,
            "dest": "end",
            "default": None,
            "required": False
        }, {
            "name": ["--file"],
            "help": "path to file with CR-delimited prefix list",
            "type": str,
            "dest": "file",
            "default": None,
            "required": False
        }])
        Log.start(settings.debug)

        with aws.Queue(settings.work_queue) as work_queue:
            source = Connection(settings.aws).get_bucket(settings.args.bucket)

            if settings.args.file:
                now = Date.now()
                for prefix in File(settings.args.file):
                    all_keys = source.keys(prefix=key_prefix(prefix))
                    for k in all_keys:
                        Log.note("Adding {{key}}", key=k)
                        work_queue.add({
                            "bucket": settings.args.bucket,
                            "key": k,
                            "timestamp": now.unix,
                            "date/time": now.format()
                        })
                return

            if settings.args.end and settings.args.start:
                up_to = str(int(settings.args.end) - 1)
                prefix = strings.common_prefix(settings.args.start, up_to)
            else:
                prefix = None
            start = Version(settings.args.start)
            end = Version(settings.args.end)

            all_keys = source.keys(prefix=prefix)
            with Timer("filtering {{num}} keys", {"num": len(all_keys)}):
                all_keys = [(k, Version(k)) for k in all_keys
                            if k.find("None") == -1]
                all_keys = [(k, p) for k, p in all_keys if start <= p < end]
            with Timer("sorting {{num}} keys", {"num": len(all_keys)}):
                all_keys = qb.sort(all_keys, 1)
            for k, p in all_keys:
                Log.note("Adding {{key}}", key=k)
                now = Date.now()
                work_queue.add({
                    "bucket": settings.args.bucket,
                    "key": k,
                    "timestamp": now.unix,
                    "date/time": now.format()
                })

    except Exception, e:
        Log.error("Problem with etl", e)
def make_test_instance(name, settings):
    if settings.filename:
        File(settings.filename).delete()
    return open_test_instance(name, settings)
Exemple #38
0
from pyLibrary import convert
from pyLibrary.debugs import startup
from pyLibrary.maths.randoms import Random
from pyLibrary.sql.mysql import MySQL
from pyLibrary.env.files import File
from pyLibrary.debugs.logs import Log
from pyLibrary.queries import qb
from pyLibrary.strings import between
from pyLibrary.dot import coalesce, wrap
from pyLibrary.thread.multithread import Multithread
from pyLibrary.times.timer import Timer

DEBUG = True

TEMPLATE_FILE = File(
    "C:/Users/klahnakoski/git/datazilla-alerts/tests/resources/hg/changeset_nofiles.template"
)


def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
Exemple #39
0
    assert settings["class"]

    # IMPORT MODULE FOR HANDLER
    path = settings["class"].split(".")
    class_name = path[-1]
    path = ".".join(path[:-1])
    constructor = None
    try:
        temp = __import__(path, globals(), locals(), [class_name], -1)
        constructor = object.__getattribute__(temp, class_name)
    except Exception, e:
        if settings.stream and not constructor:
            # PROVIDE A DEFAULT STREAM HANLDER
            constructor = TextLog_usingThreadedStream
        else:
            Log.error("Can not find class {{class}}",  {"class": path}, cause=e)

    # IF WE NEED A FILE, MAKE SURE DIRECTORY EXISTS
    if settings.filename:
        from pyLibrary.env.files import File

        f = File(settings.filename)
        if not f.parent.exists:
            f.parent.create()

    settings['class'] = None
    params = unwrap(settings)
    log_instance = constructor(**params)
    return log_instance

Exemple #40
0
def get_changesets(date_range=None, revision_range=None, repo=None):
    # GET ALL CHANGESET INFO
    args = [
        "hg",
        "log",
        "--cwd",
        File(repo.directory).filename,
        "-v",
        # "-p",   # TO GET PATCH CONTENTS
        "--style",
        TEMPLATE_FILE.filename
    ]

    if date_range is not None:
        if date_range.max == None:
            if date_range.min == None:
                drange = ">0 0"
            else:
                drange = ">" + unicode(convert.datetime2unix(
                    date_range.min)) + " 0"
        else:
            if date_range.min == None:
                drange = "<" + unicode(
                    convert.datetime2unix(date_range.max) - 1) + " 0"
            else:
                drange = unicode(convert.datetime2unix(
                    date_range.min)) + " 0 to " + unicode(
                        convert.datetime2unix(date_range.max) - 1) + " 0"

        args.extend(["--date", drange])

    if revision_range is not None:
        args.extend(
            ["-r",
             str(revision_range.min) + ":" + str(revision_range.max)])

    proc = subprocess.Popen(args,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            bufsize=-1)

    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error(
                                "Unable to pull hg log: return code {{return_code}}",
                                {"return_code": proc.returncode})
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)

                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (date, node, rev, author, branches, files, file_adds,
                 file_dels, p1rev, p1node, parents, children, tags,
                 desc) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos":
                    repo.name,
                    "date":
                    convert.unix2datetime(
                        convert.value2number(date.split(" ")[0])),
                    "node":
                    node,
                    "revision":
                    rev,
                    "author":
                    author,
                    "branches":
                    set(branches.split("\n")) - {""},
                    "file_changes":
                    files - file_adds - file_dels - {""},
                    "file_adds":
                    file_adds,
                    "file_dels":
                    file_dels,
                    "parents":
                    set(parents.split("\n")) - {""} | {p1rev + ":" + p1node},
                    "children":
                    set(children.split("\n")) - {""},
                    "tags":
                    set(tags.split("\n")) - {""},
                    "description":
                    desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(
                    e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})",
                          {"message": line}, e)

            Log.error("Problem iterating through log", e)
Exemple #41
0
def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error(
                            "Can not clone {{repos.url}}, because {{problem}}",
                            {
                                "repos": repo,
                                "problem": line
                            })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()

    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd",
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error(
                    "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}",
                    {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}",
                          {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}",
                     {"pull_results": output})
Exemple #42
0
from active_data import record_request, cors_wrapper
from active_data.actions import save_query
from active_data.actions.json import get_raw_json
from active_data.actions.query import query
from active_data.actions.save_query import SaveQueries, find_query
from active_data.actions.static import download
from pyLibrary import convert
from pyLibrary.debugs import constants, startup
from pyLibrary.debugs.logs import Log
from pyLibrary.env import elasticsearch
from pyLibrary.env.files import File
from pyLibrary.queries import containers
from pyLibrary.queries.meta import FromESMetadata
from pyLibrary.thread.threads import Thread

OVERVIEW = File("active_data/public/index.html").read()

app = Flask(__name__)
config = None


@app.route('/', defaults={'path': ''}, methods=['OPTIONS', 'HEAD'])
@app.route('/<path:path>', methods=['OPTIONS', 'HEAD'])
@cors_wrapper
def _head(path):
    return Response(b'', status=200)


app.add_url_rule('/tools/<path:filename>', None, download)
app.add_url_rule('/find/<path:hash>', None, find_query)
app.add_url_rule('/query',