Example #1
0
def run(queue_filename, output_dir, sequence_fn):
    with open(queue_filename) as f:
        queue = json.load(f)

    os.makedirs(output_dir, exist_ok=True)

    with Tee(os.path.join(output_dir, 'log.txt'), 'w+'):
        for item in queue:
            if not item.get('active', True):
                continue

            symmetry_modes = (
                SymmetryMode[item.get('x_symmetry_mode', 'SYMMETRY_OFF')],
                SymmetryMode[item.get('y_symmetry_mode', 'SYMMETRY_OFF')]
            )

            print(item['filename'])

            sandpile = sandpiles.create_sandpile(shape=tuple(item['shape']),
                                                 symmetry_modes=symmetry_modes)

            sequence_fn(sandpile, item['filename'], symmetry_modes)

            print('-' * 75)
Example #2
0
import numpy as np
import numpy.lib.recfunctions as rf
import matplotlib.pyplot as plt
import matplotlib as mpl

from npext import add_fields
from graph_new import FlowPointGraph, FlexibleGraph
#from propaguledispersal_corrected import IDTYPE
IDTYPE = "|S11" 
import saveobject
from hrprint import HierarchichalPrinter

from tee import Tee
from builtins import FileNotFoundError
if len(sys.argv) > 1:
    teeObject = Tee(sys.argv[1])

np.random.seed()
FILE_EXT = ".rn"

FIGARGS = {"figsize":(2.2,2)}
SHOWCAPTIONS = False
FIGADJUSTMENT = dict(left=0.2, right=0.99, top=0.99, bottom=0.15)

TICKS = {"mean dists":[600, 800, 1000],
         "time slowdown":([8, 13, 18]),
         "mean paths":([0, 15, 30]),
         "number unique candidates":([10, 20, 30], (0.1, 0.2)),
         "time path":([0, 7.5, 15], (0.1, 0.1)),
         "time total":[100, 400, 700]}
Example #3
0
class mitm:

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))

    session = None
    script = None
    serverkey = None
    pk = None
    sk = None
    k = None
    snonce = None
    rnonce = None
    tee = None

    def __init__(self, session, script):
        self.session = session
        self.script = script
        self.serverkey = PublicKey("47d1416f3cf982d2b510cab32ecc4f1a04971345446cb1af326f304f63da6264".decode("hex"))

    def handle_event(self, event):
        for key in event:
            if key in {"type", "from"}:
                continue
            elif key == "messageid":
                event[key] = int(event[key], 16)
            elif type(event[key]) is bool:
                continue
            elif type(event[key]) in {str, unicode}:
                event[key] = event[key].decode("hex")
        if event["type"] == "socket":
            self.tee = Tee(os.path.join(self.BASE_DIR, "session-{}.log".format(event["threadid"])))
            self.log("session started")
        elif event["type"] == "keypair":
            self.sk = PrivateKey(event["sk"])
            self.dump({"sk": self.sk}, function="PrivateKey")
        elif event["type"] == "send" or event["type"] == "recv":
            if event["messageid"] == 10100:
                event.update({"message": event["buffer"]})
                self.dump(event)
            elif event["messageid"] == 20100:
                event.update({"message": event["buffer"]})
                self.dump(event)
            else:
                if self.serverkey:
                    if self.sk:
                        if event["messageid"] == 10101:
                            self.pk = PublicKey(event["buffer"][:32])
                            self.dump({"pk": bytes(self.pk)}, function="PublicKey")
                            event["buffer"] = event["buffer"][32:]
                        if self.pk:
                            if event["messageid"] == 10101 or self.snonce:
                                if event["messageid"] in {10101, 20104} or self.rnonce:
                                    if event["messageid"] in {10101, 20104} or self.k:
                                        if event["messageid"] in {10101, 20104}:
                                            k = Box(self.sk, self.serverkey)
                                            self.dump({"s": k}, function="Box")
                                            b2 = blake2b(digest_size=24)
                                            if event["messageid"] == 20104:
                                                b2.update(bytes(self.snonce))
                                            b2.update(bytes(self.pk))
                                            b2.update(bytes(self.serverkey))
                                            nonce = b2.digest()
                                            if event["messageid"] == 10101:
                                                self.dump(
                                                    {"pk": self.pk, "serverkey": self.serverkey, "nonce": nonce},
                                                    function="blake2b",
                                                )
                                            elif event["messageid"] == 20104:
                                                self.dump(
                                                    {
                                                        "snonce": self.snonce,
                                                        "pk": self.pk,
                                                        "serverkey": self.serverkey,
                                                        "nonce": nonce,
                                                    },
                                                    function="blake2b",
                                                )
                                        else:
                                            k = self.k
                                            if event["type"] == "send":
                                                self.snonce = self.increment_nonce(self.snonce)
                                                nonce = self.snonce
                                            elif event["type"] == "recv":
                                                self.rnonce = self.increment_nonce(self.rnonce)
                                                nonce = self.rnonce
                                        ciphertext = event["buffer"]
                                        event.update({"k": k, "nonce": nonce, "ciphertext": event["buffer"]})
                                        try:
                                            message = k.decrypt(ciphertext, nonce)
                                        except:
                                            self.dump(event, error=True)
                                            self.log(
                                                "Warning: failed to decrypt {}".format(event["messageid"]), error=True
                                            )
                                            if event["messageid"] in {10101, 20104}:
                                                raise
                                        else:
                                            if event["messageid"] == 10101:
                                                self.snonce = message[24:48]
                                                self.dump({"snonce": self.snonce}, function="slice")
                                                message = message[48:]
                                            elif event["messageid"] == 20104:
                                                self.rnonce = message[:24]
                                                self.k = Box.decode(message[24:56])
                                                self.dump({"rnonce": self.rnonce, "k": self.k}, function="slice")
                                                message = message[56:]
                                            event.update({"message": message})
                                            self.dump(event)
                                    else:
                                        raise Exception("Missing shared key ({}).".format(event["messageid"]))
                                else:
                                    raise Exception("Missing server nonce ({}).".format(event["messageid"]))
                            else:
                                raise Exception("Missing client nonce ({}).".format(event["messageid"]))
                        else:
                            raise Exception("Missing public key ({}).".format(event["messageid"]))
                    else:
                        raise Exception("Missing secret key ({}).".format(event["messageid"]))
                else:
                    raise Exception("Missing server key ({}).".format(event["messageid"]))
        elif event["type"] == "closing":
            self.log("session closed")
        elif event["type"] == "close":
            self.tee.flush()
            self.tee.close()
        else:
            raise Exception("Invalid event type ({}).".format(event["type"]))

    def increment_nonce(self, nonce):
        return hex(long(nonce[::-1].encode("hex"), 16) + 2)[2:-1].decode("hex")[::-1]

    def log(self, message, error=False):
        if error:
            print message
        else:
            self.script.post_message({"type": "log", "message": message})

    def dump(self, event, function=None, error=False):
        message = []
        if not function:
            function = event["type"]
        if error:
            function = function.rjust(31)
        message.append(function)
        message.append("--------------------".rjust(31))
        ordered = ["messageid", "snonce", "rnonce", "pk", "sk", "serverkey", "s", "k", "nonce", "message", "ciphertext"]
        skipped = ["from", "type", "buffer"]
        intersection = [x for x in ordered if x in event.keys()]
        for key in intersection:
            if type(event[key]) in {Box, PrivateKey, PublicKey}:
                value = bytes(event[key]).encode("hex")
            elif type(event[key]) in {dict, bool}:
                value = str(event[key])
            elif type(event[key]) in {str, unicode}:
                value = event[key].encode("hex")
            else:
                value = event[key]
            message.append("".join(["".rjust(15), key.ljust(20), str(value)]))
        message.append("")
        self.log("\n".join(message), error=error)
        extra = set(event.keys()) - set(ordered) - set(skipped)
        if extra:
            self.log("Warning: Missed key(s) ({})".format(", ".join(extra)), error=error)
Example #4
0
 def handle_event(self, event):
     for key in event:
         if key in {"type", "from"}:
             continue
         elif key == "messageid":
             event[key] = int(event[key], 16)
         elif type(event[key]) is bool:
             continue
         elif type(event[key]) in {str, unicode}:
             event[key] = event[key].decode("hex")
     if event["type"] == "socket":
         self.tee = Tee(os.path.join(self.BASE_DIR, "session-{}.log".format(event["threadid"])))
         self.log("session started")
     elif event["type"] == "keypair":
         self.sk = PrivateKey(event["sk"])
         self.dump({"sk": self.sk}, function="PrivateKey")
     elif event["type"] == "send" or event["type"] == "recv":
         if event["messageid"] == 10100:
             event.update({"message": event["buffer"]})
             self.dump(event)
         elif event["messageid"] == 20100:
             event.update({"message": event["buffer"]})
             self.dump(event)
         else:
             if self.serverkey:
                 if self.sk:
                     if event["messageid"] == 10101:
                         self.pk = PublicKey(event["buffer"][:32])
                         self.dump({"pk": bytes(self.pk)}, function="PublicKey")
                         event["buffer"] = event["buffer"][32:]
                     if self.pk:
                         if event["messageid"] == 10101 or self.snonce:
                             if event["messageid"] in {10101, 20104} or self.rnonce:
                                 if event["messageid"] in {10101, 20104} or self.k:
                                     if event["messageid"] in {10101, 20104}:
                                         k = Box(self.sk, self.serverkey)
                                         self.dump({"s": k}, function="Box")
                                         b2 = blake2b(digest_size=24)
                                         if event["messageid"] == 20104:
                                             b2.update(bytes(self.snonce))
                                         b2.update(bytes(self.pk))
                                         b2.update(bytes(self.serverkey))
                                         nonce = b2.digest()
                                         if event["messageid"] == 10101:
                                             self.dump(
                                                 {"pk": self.pk, "serverkey": self.serverkey, "nonce": nonce},
                                                 function="blake2b",
                                             )
                                         elif event["messageid"] == 20104:
                                             self.dump(
                                                 {
                                                     "snonce": self.snonce,
                                                     "pk": self.pk,
                                                     "serverkey": self.serverkey,
                                                     "nonce": nonce,
                                                 },
                                                 function="blake2b",
                                             )
                                     else:
                                         k = self.k
                                         if event["type"] == "send":
                                             self.snonce = self.increment_nonce(self.snonce)
                                             nonce = self.snonce
                                         elif event["type"] == "recv":
                                             self.rnonce = self.increment_nonce(self.rnonce)
                                             nonce = self.rnonce
                                     ciphertext = event["buffer"]
                                     event.update({"k": k, "nonce": nonce, "ciphertext": event["buffer"]})
                                     try:
                                         message = k.decrypt(ciphertext, nonce)
                                     except:
                                         self.dump(event, error=True)
                                         self.log(
                                             "Warning: failed to decrypt {}".format(event["messageid"]), error=True
                                         )
                                         if event["messageid"] in {10101, 20104}:
                                             raise
                                     else:
                                         if event["messageid"] == 10101:
                                             self.snonce = message[24:48]
                                             self.dump({"snonce": self.snonce}, function="slice")
                                             message = message[48:]
                                         elif event["messageid"] == 20104:
                                             self.rnonce = message[:24]
                                             self.k = Box.decode(message[24:56])
                                             self.dump({"rnonce": self.rnonce, "k": self.k}, function="slice")
                                             message = message[56:]
                                         event.update({"message": message})
                                         self.dump(event)
                                 else:
                                     raise Exception("Missing shared key ({}).".format(event["messageid"]))
                             else:
                                 raise Exception("Missing server nonce ({}).".format(event["messageid"]))
                         else:
                             raise Exception("Missing client nonce ({}).".format(event["messageid"]))
                     else:
                         raise Exception("Missing public key ({}).".format(event["messageid"]))
                 else:
                     raise Exception("Missing secret key ({}).".format(event["messageid"]))
             else:
                 raise Exception("Missing server key ({}).".format(event["messageid"]))
     elif event["type"] == "closing":
         self.log("session closed")
     elif event["type"] == "close":
         self.tee.flush()
         self.tee.close()
     else:
         raise Exception("Invalid event type ({}).".format(event["type"]))
Example #5
0
def main(argv=None, return_report=False, regroup=False):
    if argv is None: # if argv is empty, fetch from the commandline
        argv = sys.argv[1:]
    elif isinstance(argv, _str): # else if argv is supplied but it's a simple string, we need to parse it to a list of arguments before handing to argparse or any other argument parser
        argv = shlex.split(argv) # Parse string just like argv using shlex

    # If --gui was specified, then there's a problem
    if len(argv) == 0 or '--gui' in argv:  # pragma: no cover
        raise Exception('--gui specified but an error happened with lib/gooey, cannot load the GUI (however you can still use this script in commandline). Check that lib/gooey exists and that you have wxpython installed. Here is the error: ')

    #==== COMMANDLINE PARSER ====

    #== Commandline description
    desc = '''Regex Path Matcher v%s
Description: Match paths using regular expression, and then generate a report. Can also substitute using regex to generate output paths. A copy mode is also provided to allow the copy of files from input to output paths.
This app is essentially a path matcher using regexp, and it then rewrites the path using regexp, so that you can reuse elements from input path to build the output path.
This is very useful to reorganize folders for experiments, where scripts/softwares expect a specific directories layout in order to work.

Advices
-------
- Filepath comparison: Paths are compared against filepaths, not just folders (but of course you can match folders with regex, but remember when designing your regexp that it will compared against files paths, not directories).
- Relative filepath: Paths are relative to the rootpath (except if --show-fullpath) and that they are always unix style, even on Windows (for consistency on all platforms and to easily reuse regexp).
- Partial matching: partial matching regex is accepted, so you don't need to model the full filepath, only the part you need (eg, 'myfile' will match '/myfolder/sub/myfile-034.mat').
- Unix filepaths: on all platforms, including Windows, paths will be in unix format (except if you set --show_fullpath). It makes things simpler for you to make crossplatform regex patterns.
- Use [^/]+ to match any file/folder in the filepath: because paths are always unix-like, you can use [^/]+ to match any part of the filepath. Eg, "([^/]+)/([^/]+)/data/mprage/.+\.(img|hdr|txt)" will match "UWS/John_Doe/data/mprage/12345_t1_mprage_98782.hdr".
- Split your big task in several smaller, simpler subtasks: instead of trying to do a regex that match T1, T2, DTI, everything at the same time, try to focus on only one modality at a time and execute them using multiple regex queries: eg, move first structural images, then functional images, then dti, etc. instead of all at once.
- Python module: this library can be used as a Python module to include in your scripts (just call `main(return_report=True)`).

Note: use --gui (without any other argument) to launch the experimental gui (needs Gooey library).

In addition to the switches provided below, using this program as a Python module also provides 2 additional options:
 - return_report = True to return as a variable the files matched and the report instead of saving in a file.
 - regroup = True will return the matched files (if return_report=True) in a tree structure of nested list/dicts depending on if the groups are named or not. Groups can also avoid being matched by using non-matching groups in regex.
    ''' % __version__
    ep = ''' '''

    #== Commandline arguments
    #-- Constructing the parser
    # Use GooeyParser if we want the GUI because it will provide better widgets
    if (len(argv) == 0 or '--gui' in argv) and not '--ignore-gooey' in argv:  # pragma: no cover
        # Initialize the Gooey parser
        main_parser = gooey.GooeyParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter)
        # Define Gooey widget types explicitly (because type auto-detection doesn't work quite well)
        widget_dir = {"widget": "DirChooser"}
        widget_filesave = {"widget": "FileSaver"}
        widget_file = {"widget": "FileChooser"}
        widget_text = {"widget": "TextField"}
    else: # Else in command-line usage, use the standard argparse
        # Delete the special argument to avoid unrecognized argument error in argparse
        if len(argv) > 0 and '--ignore-gooey' in argv[0]: argv.remove('--ignore-gooey') # this argument is automatically fed by Gooey when the user clicks on Start
        # Initialize the normal argparse parser
        main_parser = argparse.ArgumentParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter)
        # Define dummy dict to keep compatibile with command-line usage
        widget_dir = {}
        widget_filesave = {}
        widget_file = {}
        widget_text = {}

    # Required arguments
    main_parser.add_argument('-i', '--input', metavar='/some/path', type=str, required=True,
                        help='Path to the input folder', **widget_dir)
    main_parser.add_argument('-ri', '--regex_input', metavar=r'"sub[^/]+/(\d+)"', type=str, required=True,
                        help=r'Regex to match input paths. Must be defined relatively from --input folder. Do not forget to enclose it in double quotes (and not single)! To match any directory, use [^/\]*? or the alias \dir, or \dirnodot if you want to match folders in combination with --dir switch.', **widget_text)

    # Optional output/copy mode
    main_parser.add_argument('-o', '--output', metavar='/new/path', type=str, required=False, default=None,
                        help='Path to the output folder (where file will get copied over if --copy)', **widget_dir)
    main_parser.add_argument('-ro', '--regex_output', metavar=r'"newsub/\1"', type=str, required=False, default=None,
                        help='Regex to substitute input paths to convert to output paths. Must be defined relatively from --output folder. If not provided but --output is specified, will keep the same directory layout as input (useful to extract specific files without changing layout). Do not forget to enclose it in double quotes!', **widget_text)
    main_parser.add_argument('-c', '--copy', action='store_true', required=False, default=False,
                        help='Copy the matched input paths to the regex-substituted output paths.')
    main_parser.add_argument('-s', '--symlink', action='store_true', required=False, default=False,
                        help='Copy with a symbolic/soft link the matched input paths to the regex-substituted output paths (works only on Linux).')
    main_parser.add_argument('-m', '--move', action='store_true', required=False, default=False,
                        help='Move the matched input paths to the regex-substituted output paths.')
    main_parser.add_argument('--move_fast', action='store_true', required=False, default=False,
                        help='Move the matched input paths to the regex-substituted output paths, without checking first that the copy was done correctly.')
    main_parser.add_argument('-d', '--delete', action='store_true', required=False, default=False,
                        help='Delete the matched files.')

    # Optional general arguments
    main_parser.add_argument('-t', '--test', action='store_true', required=False, default=False,
                        help='Regex test mode: Stop after the first matched file and show the result of substitution. Useful to quickly check if the regex patterns are ok.')
    main_parser.add_argument('--dir', action='store_true', required=False, default=False,
                        help='Match directories too? (else only files are matched)')
    main_parser.add_argument('-y', '--yes', action='store_true', required=False, default=False,
                        help='Automatically accept the simulation and apply changes (good for batch processing and command chaining).')
    main_parser.add_argument('-f', '--force', action='store_true', required=False, default=False,
                        help='Force overwriting the target path already exists. Note that by default, if a file already exist, without this option, it won\'t get overwritten and no message will be displayed.')
    main_parser.add_argument('--show_fullpath', action='store_true', required=False, default=False,
                        help='Show full paths instead of relative paths in the simulation.')
    main_parser.add_argument('-ra', '--range', type=str, metavar='1:10-255', required=False, default=False,
                        help='Range mode: match only the files with filenames containing numbers in the specified range. The format is: (regex-match-group-id):(range-start)-(range-end). regex-match-group-id is the id of the regular expression that will contain the numbers that must be compared to the range. range-end is inclusive.')
    main_parser.add_argument('-re', '--regex_exists', metavar=r'"newsub/\1"', type=str, required=False, default=None,
                        help='Regex of output path to check if the matched regex here is matched prior writing output files.', **widget_text)
    main_parser.add_argument('--report', type=str, required=False, default='pathmatcher_report.txt', metavar='pathmatcher_report.txt',
                        help='Where to store the simulation report (default: pwd = current working dir).', **widget_filesave)
    main_parser.add_argument('--noreport', action='store_true', required=False, default=False,
                        help='Do not create a report file, print the report in the console.')
    main_parser.add_argument('--tree', action='store_true', required=False, default=False,
                        help='Regroup in a tree structure the matched files according to named and unnamed regex groups, and save the result as a json file (pathmatcher_tree.json).')
    main_parser.add_argument('-l', '--log', metavar='/some/folder/filename.log', type=str, required=False,
                        help='Path to the log file. (Output will be piped to both the stdout and the log file)', **widget_filesave)
    main_parser.add_argument('-v', '--verbose', action='store_true', required=False, default=False,
                        help='Verbose mode (show more output).')
    main_parser.add_argument('--silent', action='store_true', required=False, default=False,
                        help='No console output (but if --log specified, the log will still be saved in the specified file).')


    #== Parsing the arguments
    args = main_parser.parse_args(argv) # Storing all arguments to args
    
    #-- Set variables from arguments
    inputpath = args.input
    outputpath = args.output if args.output else None
    regex_input = args.regex_input
    regex_output = args.regex_output
    regex_exists = args.regex_exists
    copy_mode = args.copy
    symlink_mode = args.symlink
    move_mode = args.move
    movefast_mode = args.move_fast
    delete_mode = args.delete
    test_flag = args.test
    dir_flag = args.dir
    yes_flag = args.yes
    force = args.force
    only_missing = not force
    show_fullpath = args.show_fullpath
    path_range = args.range
    reportpath = args.report
    noreport = args.noreport
    tree_flag = args.tree
    verbose = args.verbose
    silent = args.silent

    # -- Sanity checks

    # First check if there is any input path, it's always needed
    if inputpath is None:
        raise NameError('No input path specified! Please specify one!')

	# Try to decode in unicode, else we will get issues down the way when outputting files
    try:
        inputpath = str(inputpath)
    except UnicodeDecodeError as exc:
        inputpath = str(inputpath, encoding=chardet.detect(inputpath)['encoding'])
    if outputpath:
        try:
            outputpath = str(outputpath)
        except UnicodeDecodeError as exc:
            outputpath = str(outputpath, encoding=chardet.detect(outputpath)['encoding'])

    # Remove trailing spaces
    inputpath = inputpath.strip()
    if outputpath:
        outputpath = outputpath.strip()

    # Input or output path is a URL (eg: file:///media/... on Ubuntu/Debian), then strip that out
    RE_urlprotocol = re.compile(r'^\w{2,}:[/\\]{2,}', re.I)
    if RE_urlprotocol.match(inputpath):
        inputpath = urllib.parse.unquote(inputpath).decode("utf8")  # first decode url encoded characters such as spaces %20
        inputpath = r'/' + RE_urlprotocol.sub(r'', inputpath)  # need to prepend the first '/' since it is probably an absolute path and here we will strip the whole protocol
    if outputpath and RE_urlprotocol.match(outputpath):
        outputpath = urllib.parse.unquote(outputpath).decode("utf8")
        outputpath = r'/' + RE_urlprotocol.sub(r'', outputpath)

    # Check if input/output paths exist, else might be a relative path, then convert to an absolute path
    rootfolderpath = inputpath if os.path.exists(inputpath) else fullpath(inputpath)
    rootoutpath = outputpath if outputpath is None or os.path.exists(outputpath) else fullpath(outputpath)

    # Single file specified instead of a folder: we define the input folder as the top parent of this file
    if os.path.isfile(inputpath): # if inputpath is a single file (instead of a folder), then define the rootfolderpath as the parent directory (for correct relative path generation, else it will also truncate the filename!)
        rootfolderpath = os.path.dirname(inputpath)
    if outputpath and os.path.isfile(outputpath): # if inputpath is a single file (instead of a folder), then define the rootfolderpath as the parent directory (for correct relative path generation, else it will also truncate the filename!)
        rootoutpath = os.path.dirname(outputpath)

    # Strip trailing slashes to ensure we correctly format paths afterward
    if rootfolderpath:
        rootfolderpath = rootfolderpath.rstrip('/\\')
    if rootoutpath:
        rootoutpath = rootoutpath.rstrip('/\\')

    # Final check of whether thepath exist
    if not os.path.isdir(rootfolderpath):
        raise NameError('Specified input path: %s (detected as %s) does not exist. Please check the specified path.' % (inputpath, rootfolderpath))

    # Check the modes are not conflicting
    if sum([1 if elt == True else 0 for elt in [copy_mode, symlink_mode, move_mode, movefast_mode, delete_mode]]) > 1:
        raise ValueError('Cannot set multiple modes simultaneously, please choose only one!')

    # Check if an output is needed and is not set
    if (copy_mode or symlink_mode or move_mode or movefast_mode) and not outputpath:
        raise ValueError('--copy or --symlink or --move or --move_fast specified but no --output !')

    # If tree mode enabled, enable also the regroup option
    if tree_flag:
        regroup = True

    # -- Configure the log file if enabled (ptee.write() will write to both stdout/console and to the log file)
    if args.log:
        ptee = Tee(args.log, 'a', nostdout=silent)
        #sys.stdout = Tee(args.log, 'a')
        sys.stderr = Tee(args.log, 'a', nostdout=silent)
    else:
        ptee = Tee(nostdout=silent)
    
    # -- Preprocess regular expression to add aliases
    # Directory alias
    regex_input = regex_input.replace('\dirnodot', r'[^\\/.]*?').replace('\dir', r'[^\\/]*?')
    regex_output = regex_output.replace('\dirnodot', r'[^\\/.]*?').replace('\dir', r'[^\\/]*?') if regex_output else regex_output
    regex_exists = regex_exists.replace('\dirnodot', r'[^\\/.]*?').replace('\dir', r'[^\\/]*?') if regex_exists else regex_exists

    #### Main program
    # Test if regular expressions are correct syntactically
    try:
        regin = re.compile(str_to_raw(regex_input))
        regout = re.compile(str_to_raw(regex_output)) if regex_output else None
        regexist = re.compile(str_to_raw(regex_exists)) if regex_exists else None
        if path_range:  # parse the range format
            temp = re.search(r'(\d+):(\d+)-(\d+)', path_range)
            prange = {"group": int(temp.group(1)), "start": int(temp.group(2)), "end": int(temp.group(3))}
            del temp
    except re.error as exc:
        ptee.write("Regular expression is not correct, please fix it! Here is the error stack:\n")
        ptee.write(traceback.format_exc())
        return 1

    ptee.write("== Regex Path Matcher started ==\n")
    ptee.write("Parameters:")
    ptee.write("- Input root: %s" % inputpath)
    ptee.write("- Input regex: %s" % regex_input)
    ptee.write("- Output root: %s" % outputpath)
    ptee.write("- Output regex: %s" % regex_output)
    ptee.write("- Full arguments: %s" % ' '.join(sys.argv))
    ptee.write("\n")

    # == FILES WALKING AND MATCHING/SUBSTITUTION STEP
    files_list = []  # "to copy" files list, stores the list of input files and their corresponding output path (computed using regex)
    files_list_regroup = {}  # files list regrouped, if regroup = True
    ptee.write("Computing paths matching and simulation report, please wait (total time depends on files count - filesize has no influence). Press CTRL+C to abort\n")
    for dirpath, filename in tqdm(recwalk(inputpath, topdown=False, folders=dir_flag), unit='files', leave=True, smoothing=0):
        # Get full absolute filepath and relative filepath from base dir
        filepath = os.path.join(dirpath, filename)
        relfilepath = path2unix(os.path.relpath(filepath, rootfolderpath)) # File relative path from the root (we truncate the rootfolderpath so that we can easily check the files later even if the absolute path is different)
        regin_match = regin.search(relfilepath)
        # Check if relative filepath matches the input regex
        if regin_match:  # Matched! We store it in the "to copy" files list
            # If range mode enabled, check if the numbers in the filepath are in the specified range, else we skip this file
            if path_range:
                curval = int(regin_match.group(prange['group']))
                if not (prange['start'] <= curval <= prange['end']):
                    continue
            # Compute the output filepath using output regex
            if outputpath:
                newfilepath = regin.sub(regex_output, relfilepath) if regex_output else relfilepath
                #fulloutpath = os.path.join(rootoutpath, newfilepath)
            else:
                newfilepath = None
                #fulloutpath = None
            # Check if output path exists (if argument is enabled)
            if regex_exists and newfilepath:
                if not os.path.exists(os.path.join(rootoutpath, regin.sub(regex_exists, relfilepath))):
                    # If not found, skip to the next file
                    if verbose or test_flag:
                        ptee.write("\rFile skipped because output does not exist: %s" % newfilepath)
                    continue
            # Store both paths into the "to copy" list
            files_list.append([relfilepath, newfilepath])
            if verbose or test_flag:  # Regex test mode or verbose: print the match
                ptee.write("\rMatch: %s %s %s\n" % (relfilepath, "-->" if newfilepath else "", newfilepath if newfilepath else ""))
                if test_flag:  # Regex test mode: break file walking after the first match
                    break
            # Store paths in a tree structure based on groups if regroup is enabled
            if regroup and regin_match.groups():
                curlevel = files_list_regroup  # current level in the tree
                parentlevel = curlevel  # parent level in the tree (necessary to modify the leaf, else there is no way to reference by pointer)
                lastg = 0  # last group key (to access the leaf)
                gdict = regin_match.groupdict()  # store the named groups, so we can pop as we consume it
                for g in regin_match.groups():
                    # For each group
                    if g is None:
                        # If group value is empty, just skip (ie, this is an optional group, this allow to specify multiple optional groups and build the tree accordingly)
                        continue
                    # Find if the current group value is in a named group, in this case we will also use the key name of the group followed by the value, and remove from dict (so that if there are multiple matching named groups with same value we don't lose them)
                    k, v, gdict = pop_first_namedgroup(gdict, g)
                    # If a named group is found, use the key followed by value as nodes
                    if k:
                        if not k in curlevel:
                            # Create node for group key/name
                            curlevel[k] = {}
                        if not g in curlevel[k]:
                            # Create subnode for group value
                            curlevel[k][g] = {}
                        # Memorize the parent level
                        parentlevel = curlevel[k]
                        lastg = g
                        # Memorize current level (step down one level for next iteration)
                        curlevel = curlevel[k][g]
                    # Else it is an unnamed group, use the value as the node name
                    else:
                        if not g in curlevel:
                            # Create node for group value
                            curlevel[g] = {}
                        # Memorize the parent level
                        parentlevel = curlevel
                        lastg = g
                        # Memorize current level (step down one level for next iteration)
                        curlevel = curlevel[g]
                # End of tree structure construction
                # Create the leaf if not done already, as a list
                if not parentlevel[lastg]:
                    parentlevel[lastg] = []
                # Append the value (so if there are multiple files matching the same structure, they will be appended in this list)
                parentlevel[lastg].append([relfilepath, newfilepath])
    ptee.write("End of simulation. %i files matched." % len(files_list))
    # Regex test mode: just quit after the first match
    if test_flag:
        if return_report:
            return files_list, None
        else:
            return 0

    # == SIMULATION REPORT STEP
    ptee.write("Preparing simulation report, please wait a few seconds...")

    # Initialize conflicts global flags
    conflict1_flag = False
    conflict2_flag = False

    # Show result in console using a Python implementation of MORE (because file list can be quite long)
    #more_display=More(num_lines=30)
    #"\n".join(map(str,files_list)) | more_display

    # Precompute conflict type 2 lookup table (= dict where each key is a output filepath, and the value the number of occurrences)
    outdict = {}
    for file_op in files_list:
        outdict[file_op[1]] = outdict.get(file_op[1], 0) + 1

    # Build and show simulation report in user's default text editor
    if noreport:
        reportfile = StringIO()
    else:
        reportfile = open(reportpath, 'w')
    try:
        reportfile.write("== REGEX PATH MATCHER SIMULATION REPORT ==\n")
        reportfile.write("Total number of files matched: %i\n" % len(files_list))
        reportfile.write("Parameters:\n")
        reportfile.write("- Input root: %s\n" % inputpath.encode('utf-8'))
        reportfile.write("- Input regex: %s\n" % regex_input)
        reportfile.write("- Output root: %s\n" % (outputpath.encode('utf-8') if outputpath else ''))
        reportfile.write("- Output regex: %s\n" % regex_output)
        reportfile.write("- Full arguments: %s" % ' '.join(sys.argv))
        reportfile.write("\r\n")
        reportfile.write("List of matched files:\n")
        for file_op in files_list:
            conflict1 = False
            conflict2 = False
            if outputpath:
                # Check if there was a conflict:
                # Type 1 - already existing output file (force overwrite?)
                fulloutpath = os.path.join(rootoutpath, file_op[1])
                if os.path.exists(fulloutpath):
                    conflict1 = True
                    conflict1_flag = True

                # Type 2 - two files will output with same name (bad regex)
                if outdict[file_op[1]] > 1:
                    conflict2 = True
                    conflict2_flag = True

            # Show relative or absolute paths?
            if show_fullpath:
                showinpath = os.path.join(rootfolderpath, file_op[0])
                showoutpath = os.path.join(rootoutpath, file_op[1]) if outputpath else None
            else:
                showinpath = file_op[0]
                showoutpath = file_op[1] if outputpath else None

            # Write into report file
            reportfile.write("* %s %s %s %s %s" % (showinpath, "-->" if (outputpath or delete_mode) else "", showoutpath if outputpath else "", "[ALREADY_EXIST]" if conflict1 else '', "[CONFLICT]" if conflict2 else ''))
            reportfile.write("\n")
        if noreport:
            reportfile.seek(0)
            print(reportfile.read())
    finally:
        try:
            reportfile.close()
        except ValueError as exc:
            pass
    # Open the simulation report with the system's default text editor
    if not (yes_flag or return_report or noreport):  # if --yes is supplied, just skip question and apply!
        ptee.write("Opening simulation report with your default editor, a new window should open.")
        open_with_default_app(reportpath)

    # == COPY/MOVE STEP
    if files_list and ( delete_mode or ((copy_mode or symlink_mode or move_mode or movefast_mode) and outputpath) ):
        # -- USER NOTIFICATION AND VALIDATION
        # Notify user of conflicts
        ptee.write("\n")
        if conflict1_flag:
            ptee.write("Warning: conflict type 1 (files already exist) has been detected. Please use --force if you want to overwrite them, else they will be skipped.\n")
        if conflict2_flag:
            ptee.write("Warning: conflict type 2 (collision) has been detected. If you continue, several files will have the same name due to the specified output regex (thus, some will be lost). You should cancel and check your regular expression for output.\n")
        if not conflict1_flag and not conflict2_flag:
            ptee.write("No conflict detected. You are good to go!")

        # Ask user if we should apply
        if not (yes_flag or return_report):  # if --yes is supplied, just skip question and apply!
            applycopy = input("Do you want to apply the result of the path reorganization simulation on %i files? [Y/N]: " % len(files_list))
            if applycopy.lower() != 'y':
                return 0

        # -- APPLY STEP
        ptee.write("Applying new path structure, please wait (total time depends on file sizes and matches count). Press CTRL+C to abort")
        for infilepath, outfilepath in tqdm(files_list, total=len(files_list), unit='files', leave=True):
            if verbose:
                ptee.write("%s --> %s" % (infilepath, outfilepath))
            # Copy the file! (User previously accepted to apply the simulation)
            fullinpath = os.path.join(rootfolderpath, infilepath)
            if outputpath:
                fulloutpath = os.path.join(rootoutpath, outfilepath)
                if movefast_mode:  # movefast: just move the file/directory tree
                    move_any(fullinpath, fulloutpath)
                else:  # else we first copy in any case, then delete old file if move_mode
                    copy_any(fullinpath, fulloutpath, only_missing=only_missing, symlink=True if symlink_mode else False)  # copy file
                    if move_mode:  # if move mode, then delete the old file. Copy/delete is safer than move because we can ensure that the file is fully copied (metadata/stats included) before deleting the old
                        remove_if_exist(fullinpath)
            if delete_mode:  # if delete mode, ensure that the original file is deleted!
                remove_if_exist(fullinpath)

    # == RETURN AND END OF MAIN
    ptee.write("Task done, quitting.")
    # Save the tree structure in a json file if --tree is enabled
    if tree_flag:
        with open('pathmatcher_tree.json', 'wb') as jsonout:
            jsonout.write(json.dumps(files_list_regroup, sort_keys=True, indent=4, separators=(',', ': ')))
        print('Tree structure saved in file pathmatcher_tree.json')
    # Script mode: return the matched files and their substitutions if available
    if return_report:
        if regroup:
            return files_list_regroup, [conflict1_flag, conflict2_flag]
        else:
            return files_list, [conflict1_flag, conflict2_flag]
    # Standalone mode: just return non error code
    else:
        return 0
Example #6
0
def get_radius(sandpile):
    arr = sandpile.data.get()

    idx = 0
    while not arr[idx, -1]:
        idx += 1

    return sandpile.data.shape[0] - idx


def get_next_best_radius(new_radius):
    return ((new_radius + 7) // 8) * 8


with Tee(os.path.join(output_dir, 'log.txt'), 'a+'):
    symmetry_modes = (SymmetryMode.SYMMETRY_ON_WITH_OVERLAP,
                      SymmetryMode.SYMMETRY_ON_WITH_OVERLAP)

    sandpile = sandpiles.create_sandpile(shape=(8, 8),
                                         symmetry_modes=symmetry_modes)
    sandpile.data[-1, -1] = 1

    save(output_dir, 0, sandpile)

    for count in range(1, target_radius + 1):
        saved_sandpile = sandpiles.try_load_sandpile(
            os.path.join(output_dir, str(count)))
        if saved_sandpile is not None:
            sandpile = saved_sandpile
            continue
def main(argv=None, return_report=False, regroup=False):
    if argv is None: # if argv is empty, fetch from the commandline
        argv = sys.argv[1:]
    elif isinstance(argv, _str): # else if argv is supplied but it's a simple string, we need to parse it to a list of arguments before handing to argparse or any other argument parser
        argv = shlex.split(argv) # Parse string just like argv using shlex

    # If --gui was specified, then there's a problem
    if len(argv) == 1 or '--gui' in argv:  # pragma: no cover
        print(exc)
        raise Exception('--gui specified but an error happened with lib/gooey, cannot load the GUI (however you can still use this script in commandline). Check that lib/gooey exists and that you have wxpython installed. Here is the error: ')

    #==== COMMANDLINE PARSER ====

    #== Commandline description
    desc = '''Regex Path Matcher v%s
Description: Match paths using regular expression, and then generate a report. Can also substitute using regex to generate output paths. A copy mode is also provided to allow the copy of files from input to output paths.
This app is essentially a path matcher using regexp, and it then rewrites the path using regexp, so that you can reuse elements from input path to build the output path.
This is very useful to reorganize folders for experiments, where scripts/softwares expect a specific directories layout in order to work.

Advices
-------
- Filepath comparison: Paths are compared against filepaths, not just folders (but of course you can match folders with regex, but remember when designing your regexp that it will compared against files paths, not directories).
- Relative filepath: Paths are relative to the rootpath (except if --show-fullpath) and that they are always unix style, even on Windows (for consistency on all platforms and to easily reuse regexp).
- Partial matching: partial matching regex is accepted, so you don't need to model the full filepath, only the part you need (eg, 'myfile' will match '/myfolder/sub/myfile-034.mat').
- Unix filepaths: on all platforms, including Windows, paths will be in unix format (except if you set --show_fullpath). It makes things simpler for you to make crossplatform regex patterns.
- Use [^/]+ to match any file/folder in the filepath: because paths are always unix-like, you can use [^/]+ to match any part of the filepath. Eg, "([^/]+)/([^/]+)/data/mprage/.+\.(img|hdr|txt)" will match "UWS/John_Doe/data/mprage/12345_t1_mprage_98782.hdr".
- Split your big task in several smaller, simpler subtasks: instead of trying to do a regex that match T1, T2, DTI, everything at the same time, try to focus on only one modality at a time and execute them using multiple regex queries: eg, move first structural images, then functional images, then dti, etc. instead of all at once.
- Python module: this library can be used as a Python module to include in your scripts (just call `main(return_report=True)`).

Note: use --gui (without any other argument) to launch the experimental gui (needs Gooey library).

In addition to the switches provided below, using this program as a Python module also provides 2 additional options:
 - return_report = True to return as a variable the files matched and the report instead of saving in a file.
 - regroup = True will return the matched files (if return_report=True) in a tree structure of nested list/dicts depending on if the groups are named or not. Groups can also avoid being matched by using non-matching groups in regex.
    ''' % __version__
    ep = ''' '''

    #== Commandline arguments
    #-- Constructing the parser
    # Use GooeyParser if we want the GUI because it will provide better widgets
    if (len(argv) == 0 or '--gui' in argv) and not '--ignore-gooey' in argv:  # pragma: no cover
        # Initialize the Gooey parser
        main_parser = gooey.GooeyParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter)
        # Define Gooey widget types explicitly (because type auto-detection doesn't work quite well)
        widget_dir = {"widget": "DirChooser"}
        widget_filesave = {"widget": "FileSaver"}
        widget_file = {"widget": "FileChooser"}
        widget_text = {"widget": "TextField"}
    else: # Else in command-line usage, use the standard argparse
        # Delete the special argument to avoid unrecognized argument error in argparse
        if len(argv) > 0 and '--ignore-gooey' in argv[0]: argv.remove('--ignore-gooey') # this argument is automatically fed by Gooey when the user clicks on Start
        # Initialize the normal argparse parser
        main_parser = argparse.ArgumentParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter)
        # Define dummy dict to keep compatibile with command-line usage
        widget_dir = {}
        widget_filesave = {}
        widget_file = {}
        widget_text = {}

    # Required arguments
    main_parser.add_argument('-i', '--input', metavar='/some/path', type=str, required=True,
                        help='Path to the input folder', **widget_dir)
    main_parser.add_argument('-ri', '--regex_input', metavar=r'"sub[^/]+/(\d+)"', type=str, required=True,
                        help=r'Regex to match input paths. Must be defined relatively from --input folder. Do not forget to enclose it in double quotes (and not single)! To match any directory, use [^/\]*? or the alias \dir.', **widget_text)

    # Optional output/copy mode
    main_parser.add_argument('-o', '--output', metavar='/new/path', type=str, required=False, default=None,
                        help='Path to the output folder (where file will get copied over if --copy)', **widget_dir)
    main_parser.add_argument('-ro', '--regex_output', metavar=r'"newsub/\1"', type=str, required=False, default=None,
                        help='Regex to substitute input paths to convert to output paths. Must be defined relatively from --output folder. If not provided but --output is specified, will keep the same directory layout as input (useful to extract specific files without changing layout). Do not forget to enclose it in double quotes!', **widget_text)
    main_parser.add_argument('-c', '--copy', action='store_true', required=False, default=False,
                        help='Copy the matched input paths to the regex-substituted output paths.')
    main_parser.add_argument('-s', '--symlink', action='store_true', required=False, default=False,
                        help='Copy with a symbolic/soft link the matched input paths to the regex-substituted output paths (works only on Linux).')
    main_parser.add_argument('-m', '--move', action='store_true', required=False, default=False,
                        help='Move the matched input paths to the regex-substituted output paths.')
    main_parser.add_argument('--move_fast', action='store_true', required=False, default=False,
                        help='Move the matched input paths to the regex-substituted output paths, without checking first that the copy was done correctly.')
    main_parser.add_argument('-d', '--delete', action='store_true', required=False, default=False,
                        help='Delete the matched files.')

    # Optional general arguments
    main_parser.add_argument('-t', '--test', action='store_true', required=False, default=False,
                        help='Regex test mode: Stop after the first matched file and show the result of substitution. Useful to quickly check if the regex patterns are ok.')
    main_parser.add_argument('-y', '--yes', action='store_true', required=False, default=False,
                        help='Automatically accept the simulation and apply changes (good for batch processing and command chaining).')
    main_parser.add_argument('-f', '--force', action='store_true', required=False, default=False,
                        help='Force overwriting the target path already exists. Note that by default, if a file already exist, without this option, it won\'t get overwritten and no message will be displayed.')
    main_parser.add_argument('--show_fullpath', action='store_true', required=False, default=False,
                        help='Show full paths instead of relative paths in the simulation.')
    main_parser.add_argument('-ra', '--range', type=str, metavar='1:10-255', required=False, default=False,
                        help='Range mode: match only the files with filenames containing numbers in the specified range. The format is: (regex-match-group-id):(range-start)-(range-end). regex-match-group-id is the id of the regular expression that will contain the numbers that must be compared to the range. range-end is inclusive.')
    main_parser.add_argument('--report', type=str, required=False, default='pathmatcher_report.txt', metavar='pathmatcher_report.txt',
                        help='Where to store the simulation report (default: pwd = current working dir).', **widget_filesave)
    main_parser.add_argument('-l', '--log', metavar='/some/folder/filename.log', type=str, required=False,
                        help='Path to the log file. (Output will be piped to both the stdout and the log file)', **widget_filesave)
    main_parser.add_argument('-v', '--verbose', action='store_true', required=False, default=False,
                        help='Verbose mode (show more output).')
    main_parser.add_argument('--silent', action='store_true', required=False, default=False,
                        help='No console output (but if --log specified, the log will still be saved in the specified file).')


    #== Parsing the arguments
    args = main_parser.parse_args(argv) # Storing all arguments to args
    
    #-- Set variables from arguments
    inputpath = args.input
    outputpath = args.output if args.output else None
    regex_input = args.regex_input
    regex_output = args.regex_output
    copy_mode = args.copy
    symlink_mode = args.symlink
    move_mode = args.move
    movefast_mode = args.move_fast
    delete_mode = args.delete
    test_flag = args.test
    yes_flag = args.yes
    force = args.force
    only_missing = not force
    show_fullpath = args.show_fullpath
    path_range = args.range
    reportpath = args.report
    verbose = args.verbose
    silent = args.silent

    # -- Sanity checks

    # First check if there is any input path, it's always needed
    if inputpath is None:
        raise NameError('No input path specified! Please specify one!')

    # Remove trailing spaces
    inputpath = inputpath.strip()
    if outputpath:
        outputpath = outputpath.strip()

    # Input or output path is a URL (eg: file:///media/... on Ubuntu/Debian), then strip that out
    RE_urlprotocol = re.compile(r'^\w{2,}:[/\\]{2,}', re.I)
    if RE_urlprotocol.match(inputpath):
        inputpath = urllib.unquote(inputpath).decode("utf8")  # first decode url encoded characters such as spaces %20
        inputpath = r'/' + RE_urlprotocol.sub(r'', inputpath)  # need to prepend the first '/' since it is probably an absolute path and here we will strip the whole protocol
    if outputpath and RE_urlprotocol.match(outputpath):
        outputpath = urllib.unquote(outputpath).decode("utf8")
        outputpath = r'/' + RE_urlprotocol.sub(r'', outputpath)

    # Check if input/output paths exist, else might be a relative path, then convert to an absolute path
    rootfolderpath = inputpath if os.path.exists(inputpath) else fullpath(inputpath)
    rootoutpath = outputpath if outputpath is None or os.path.exists(outputpath) else fullpath(outputpath)

    # Single file specified instead of a folder: we define the input folder as the top parent of this file
    if os.path.isfile(inputpath): # if inputpath is a single file (instead of a folder), then define the rootfolderpath as the parent directory (for correct relative path generation, else it will also truncate the filename!)
        rootfolderpath = os.path.dirname(inputpath)
    if outputpath and os.path.isfile(outputpath): # if inputpath is a single file (instead of a folder), then define the rootfolderpath as the parent directory (for correct relative path generation, else it will also truncate the filename!)
        rootoutpath = os.path.dirname(outputpath)

    # Strip trailing slashes to ensure we correctly format paths afterward
    if rootfolderpath:
        rootfolderpath = rootfolderpath.rstrip('/\\')
    if rootoutpath:
        rootoutpath = rootoutpath.rstrip('/\\')

    # Final check of whether thepath exist
    if not os.path.isdir(rootfolderpath):
        raise NameError('Specified input path: %s (detected as %s) does not exist. Please check the specified path.' % (inputpath, rootfolderpath))

    # Check the modes are not conflicting
    if sum([1 if elt == True else 0 for elt in [copy_mode, symlink_mode, move_mode, movefast_mode, delete_mode]]) > 1:
        raise ValueError('Cannot set multiple modes simultaneously, please choose only one!')

    # Check if an output is needed and is not set
    if (copy_mode or symlink_mode or move_mode or movefast_mode) and not outputpath:
        raise ValueError('--copy or --symlink or --move or --move_fast specified but no --output !')

    # -- Configure the log file if enabled (ptee.write() will write to both stdout/console and to the log file)
    if args.log:
        ptee = Tee(args.log, 'a', nostdout=silent)
        #sys.stdout = Tee(args.log, 'a')
        sys.stderr = Tee(args.log, 'a', nostdout=silent)
    else:
        ptee = Tee(nostdout=silent)
    
    # -- Preprocess regular expression to add aliases
    # Directory alias
    regex_input = regex_input.replace('\dir', r'[^\\/]*?')
    regex_output = regex_output.replace('\dir', r'[^\\/]*?') if regex_output else regex_output

    #### Main program
    # Test if regular expressions are correct syntactically
    try:
        regin = re.compile(str_to_raw(regex_input))
        regout = re.compile(str_to_raw(regex_output)) if regex_output else None
        if path_range:  # parse the range format
            temp = re.search(r'(\d+):(\d+)-(\d+)', path_range)
            prange = {"group": int(temp.group(1)), "start": int(temp.group(2)), "end": int(temp.group(3))}
            del temp
    except re.error as exc:
        ptee.write("Regular expression is not correct, please fix it! Here is the error stack:\n")
        ptee.write(traceback.format_exc())
        return 1

    ptee.write("== Regex Path Matcher started ==\n")
    ptee.write("Parameters:")
    ptee.write("- Input root: %s" % inputpath)
    ptee.write("- Input regex: %s" % regex_input)
    ptee.write("- Output root: %s" % outputpath)
    ptee.write("- Output regex: %s" % regex_output)
    ptee.write("\n")

    # == FILES WALKING AND MATCHING/SUBSTITUTION STEP
    files_list = []  # "to copy" files list, stores the list of input files and their corresponding output path (computed using regex)
    files_list_regroup = {}  # files list regrouped, if regroup = True
    ptee.write("Computing paths matching and simulation report, please wait (total time depends on files count - filesize has no influence). Press CTRL+C to abort\n")
    for dirpath, filename in tqdm(recwalk(inputpath, topdown=False), unit='files', leave=True, smoothing=0):
        # Get full absolute filepath and relative filepath from base dir
        filepath = os.path.join(dirpath, filename)
        relfilepath = path2unix(os.path.relpath(filepath, rootfolderpath)) # File relative path from the root (we truncate the rootfolderpath so that we can easily check the files later even if the absolute path is different)
        regin_match = regin.search(relfilepath)
        # Check if relative filepath matches the input regex
        if regin_match:  # Matched! We store it in the "to copy" files list
            # If range mode enabled, check if the numbers in the filepath are in the specified range, else we skip this file
            if path_range:
                curval = int(regin_match.group(prange['group']))
                if not (prange['start'] <= curval <= prange['end']):
                    continue
            # Compute the output filepath using output regex
            if outputpath:
                newfilepath = regin.sub(regex_output, relfilepath) if regex_output else relfilepath
                #fulloutpath = os.path.join(rootoutpath, newfilepath)
            else:
                newfilepath = None
                #fulloutpath = None
            # Store both paths into the "to copy" list
            files_list.append([relfilepath, newfilepath])
            if verbose or test_flag:  # Regex test mode or verbose: print the match
                ptee.write("\rMatch: %s %s %s\n" % (relfilepath, "-->" if newfilepath else "", newfilepath if newfilepath else ""))
                if test_flag:  # Regex test mode: break file walking after the first match
                    break
            # Store paths in a tree structure based on groups if regroup is enabled
            if regroup and regin_match.groups():
                curlevel = files_list_regroup  # current level in the tree
                parentlevel = curlevel  # parent level in the tree (necessary to modify the leaf, else there is no way to reference by pointer)
                lastg = 0  # last group key (to access the leaf)
                gdict = regin_match.groupdict()  # store the named groups, so we can pop as we consume it
                for g in regin_match.groups():
                    # For each group
                    if g is None:
                        # If group value is empty, just skip (ie, this is an optional group, this allow to specify multiple optional groups and build the tree accordingly)
                        continue
                    # Find if the current group value is in a named group, in this case we will also use the key name of the group followed by the value, and remove from dict (so that if there are multiple matching named groups with same value we don't lose them)
                    k, v, gdict = pop_first_namedgroup(gdict, g)
                    # If a named group is found, use the key followed by value as nodes
                    if k:
                        if not k in curlevel:
                            # Create node for group key/name
                            curlevel[k] = {}
                        if not g in curlevel[k]:
                            # Create subnode for group value
                            curlevel[k][g] = {}
                        # Memorize the parent level
                        parentlevel = curlevel[k]
                        lastg = g
                        # Memorize current level (step down one level for next iteration)
                        curlevel = curlevel[k][g]
                    # Else it is an unnamed group, use the value as the node name
                    else:
                        if not g in curlevel:
                            # Create node for group value
                            curlevel[g] = {}
                        # Memorize the parent level
                        parentlevel = curlevel
                        lastg = g
                        # Memorize current level (step down one level for next iteration)
                        curlevel = curlevel[g]
                # End of tree structure construction
                # Create the leaf if not done already, as a list
                if not parentlevel[lastg]:
                    parentlevel[lastg] = []
                # Append the value (so if there are multiple files matching the same structure, they will be appended in this list)
                parentlevel[lastg].append([relfilepath, newfilepath])
    ptee.write("End of simulation. %i files matched." % len(files_list))
    # Regex test mode: just quit after the first match
    if test_flag:
        if return_report:
            return files_list, None
        else:
            return 0

    # == SIMULATION REPORT STEP
    ptee.write("Preparing simulation report, please wait a few seconds...")

    # Initialize conflicts global flags
    conflict1_flag = False
    conflict2_flag = False

    # Show result in console using a Python implementation of MORE (because file list can be quite long)
    #more_display=More(num_lines=30)
    #"\n".join(map(str,files_list)) | more_display

    # Precompute conflict type 2 lookup table (= dict where each key is a output filepath, and the value the number of occurrences)
    outdict = {}
    for file_op in files_list:
        outdict[file_op[1]] = outdict.get(file_op[1], 0) + 1

    # Build and show simulation report in user's default text editor
    with open(reportpath, 'w') as reportfile:
        reportfile.write("== REGEX PATH MATCHER SIMULATION REPORT ==\n")
        reportfile.write("Total number of files matched: %i\n" % len(files_list))
        reportfile.write("Parameters:\n")
        reportfile.write("- Input root: %s\n" % inputpath)
        reportfile.write("- Input regex: %s\n" % regex_input)
        reportfile.write("- Output root: %s\n" % outputpath)
        reportfile.write("- Output regex: %s\n" % regex_output)
        reportfile.write("\n")
        reportfile.write("List of matched files:\n")
        for file_op in files_list:
            conflict1 = False
            conflict2 = False
            if outputpath:
                # Check if there was a conflict:
                # Type 1 - already existing output file (force overwrite?)
                fulloutpath = os.path.join(rootoutpath, file_op[1])
                if os.path.exists(fulloutpath):
                    conflict1 = True
                    conflict1_flag = True

                # Type 2 - two files will output with same name (bad regex)
                if outdict[file_op[1]] > 1:
                    conflict2 = True
                    conflict2_flag = True

            # Show relative or absolute paths?
            if show_fullpath:
                showinpath = os.path.join(rootfolderpath, file_op[0])
                showoutpath = os.path.join(rootoutpath, file_op[1]) if outputpath else None
            else:
                showinpath = file_op[0]
                showoutpath = file_op[1] if outputpath else None

            # Write into report file
            reportfile.write("* %s %s %s %s %s" % (showinpath, "-->" if (outputpath or delete_mode) else "", showoutpath if outputpath else "", "[ALREADY_EXIST]" if conflict1 else '', "[CONFLICT]" if conflict2 else ''))
            reportfile.write("\n")
    # Open the simulation report with the system's default text editor
    if not (yes_flag or return_report):  # if --yes is supplied, just skip question and apply!
        ptee.write("Opening simulation report with your default editor, a new window should open.")
        open_with_default_app(reportpath)

    # == COPY/MOVE STEP
    if files_list and ( delete_mode or ((copy_mode or symlink_mode or move_mode or movefast_mode) and outputpath) ):
        # -- USER NOTIFICATION AND VALIDATION
        # Notify user of conflicts
        ptee.write("\n")
        if conflict1_flag:
            ptee.write("Warning: conflict type 1 (files already exist) has been detected. Please use --force if you want to overwrite them, else they will be skipped.\n")
        if conflict2_flag:
            ptee.write("Warning: conflict type 2 (collision) has been detected. If you continue, several files will have the same name due to the specified output regex (thus, some will be lost). You should cancel and check your regular expression for output.\n")
        if not conflict1_flag and not conflict2_flag:
            ptee.write("No conflict detected. You are good to go!")

        # Ask user if we should apply
        if not (yes_flag or return_report):  # if --yes is supplied, just skip question and apply!
            applycopy = raw_input("Do you want to apply the result of the path reorganization simulation on %i files? [Y/N]: " % len(files_list))
            if applycopy.lower() != 'y':
                return 0

        # -- APPLY STEP
        ptee.write("Applying new path structure, please wait (total time depends on file sizes and matches count). Press CTRL+C to abort")
        for infilepath, outfilepath in tqdm(files_list, total=len(files_list), unit='files', leave=True):
            if verbose:
                ptee.write("%s --> %s" % (infilepath, outfilepath))
            # Copy the file! (User previously accepted to apply the simulation)
            fullinpath = os.path.join(rootfolderpath, infilepath)
            if outputpath:
                fulloutpath = os.path.join(rootoutpath, outfilepath)
                if movefast_mode:  # movefast: just move the file/directory tree
                    move_any(fullinpath, fulloutpath)
                else:  # else we first copy in any case, then delete old file if move_mode
                    copy_any(fullinpath, fulloutpath, only_missing=only_missing, symlink=True if symlink_mode else False)  # copy file
                    if move_mode:  # if move mode, then delete the old file. Copy/delete is safer than move because we can ensure that the file is fully copied (metadata/stats included) before deleting the old
                        remove_if_exist(fullinpath)
            if delete_mode:  # if delete mode, ensure that the original file is deleted!
                remove_if_exist(fullinpath)

    # == RETURN AND END OF MAIN
    ptee.write("Task done, quitting.")
    if return_report:  # return the matched files and their substitutions if available
        if regroup:
            return files_list_regroup, [conflict1_flag, conflict2_flag]
        else:
            return files_list, [conflict1_flag, conflict2_flag]
    else:  # Just return non error code
        return 0
Example #8
0
class mitm:

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))

    session = None
    script = None
    serverkey = None
    pk = None
    sk = None
    k = None
    snonce = None
    rnonce = None
    tee = None

    def __init__(self, session, script):
        self.session = session
        self.script = script
        self.serverkey = PublicKey(
            "47d1416f3cf982d2b510cab32ecc4f1a04971345446cb1af326f304f63da6264".
            decode("hex"))

    def handle_event(self, event):
        for key in event:
            if key in {"type", "from"}:
                continue
            elif key == "messageid":
                event[key] = int(event[key], 16)
            elif type(event[key]) is bool:
                continue
            elif type(event[key]) in {str, unicode}:
                event[key] = event[key].decode("hex")
        if event["type"] == "socket":
            self.tee = Tee(
                os.path.join(self.BASE_DIR,
                             "session-{}.log".format(event["threadid"])))
            self.log("session started")
        elif event["type"] == "keypair":
            self.sk = PrivateKey(event["sk"])
            self.dump({"sk": self.sk}, function="PrivateKey")
        elif event["type"] == "send" or event["type"] == "recv":
            if event["messageid"] == 10100:
                event.update({"message": event["buffer"]})
                self.dump(event)
            elif event["messageid"] == 20100:
                event.update({"message": event["buffer"]})
                self.dump(event)
            else:
                if self.serverkey:
                    if self.sk:
                        if event["messageid"] == 10101:
                            self.pk = PublicKey(event["buffer"][:32])
                            self.dump({"pk": bytes(self.pk)},
                                      function="PublicKey")
                            event["buffer"] = event["buffer"][32:]
                        if self.pk:
                            if event["messageid"] == 10101 or self.snonce:
                                if event["messageid"] in {10101, 20104
                                                          } or self.rnonce:
                                    if event["messageid"] in {10101, 20104
                                                              } or self.k:
                                        if event["messageid"] in {
                                                10101, 20104
                                        }:
                                            k = Box(self.sk, self.serverkey)
                                            self.dump({"s": k}, function="Box")
                                            b2 = blake2b(digest_size=24)
                                            if event["messageid"] == 20104:
                                                b2.update(bytes(self.snonce))
                                            b2.update(bytes(self.pk))
                                            b2.update(bytes(self.serverkey))
                                            nonce = b2.digest()
                                            if event["messageid"] == 10101:
                                                self.dump(
                                                    {
                                                        "pk": self.pk,
                                                        "serverkey":
                                                        self.serverkey,
                                                        "nonce": nonce
                                                    },
                                                    function="blake2b")
                                            elif event["messageid"] == 20104:
                                                self.dump(
                                                    {
                                                        "snonce": self.snonce,
                                                        "pk": self.pk,
                                                        "serverkey":
                                                        self.serverkey,
                                                        "nonce": nonce
                                                    },
                                                    function="blake2b")
                                        else:
                                            k = self.k
                                            if event["type"] == "send":
                                                self.snonce = self.increment_nonce(
                                                    self.snonce)
                                                nonce = self.snonce
                                            elif event["type"] == "recv":
                                                self.rnonce = self.increment_nonce(
                                                    self.rnonce)
                                                nonce = self.rnonce
                                        ciphertext = event["buffer"]
                                        event.update({
                                            "k":
                                            k,
                                            "nonce":
                                            nonce,
                                            "ciphertext":
                                            event["buffer"]
                                        })
                                        try:
                                            message = k.decrypt(
                                                ciphertext, nonce)
                                        except:
                                            self.dump(event, error=True)
                                            self.log(
                                                "Warning: failed to decrypt {}"
                                                .format(event["messageid"]),
                                                error=True)
                                            if event["messageid"] in {
                                                    10101, 20104
                                            }:
                                                raise
                                        else:
                                            if event["messageid"] == 10101:
                                                self.snonce = message[24:48]
                                                self.dump(
                                                    {"snonce": self.snonce},
                                                    function="slice")
                                                message = message[48:]
                                            elif event["messageid"] == 20104:
                                                self.rnonce = message[:24]
                                                self.k = Box.decode(
                                                    message[24:56])
                                                self.dump(
                                                    {
                                                        "rnonce": self.rnonce,
                                                        "k": self.k
                                                    },
                                                    function="slice")
                                                message = message[56:]
                                            event.update({"message": message})
                                            self.dump(event)
                                    else:
                                        raise Exception(
                                            "Missing shared key ({}).".format(
                                                event["messageid"]))
                                else:
                                    raise Exception(
                                        "Missing server nonce ({}).".format(
                                            event["messageid"]))
                            else:
                                raise Exception(
                                    "Missing client nonce ({}).".format(
                                        event["messageid"]))
                        else:
                            raise Exception("Missing public key ({}).".format(
                                event["messageid"]))
                    else:
                        raise Exception("Missing secret key ({}).".format(
                            event["messageid"]))
                else:
                    raise Exception("Missing server key ({}).".format(
                        event["messageid"]))
        elif event["type"] == "closing":
            self.log("session closed")
        elif event["type"] == "close":
            self.tee.flush()
            self.tee.close()
        else:
            raise Exception("Invalid event type ({}).".format(event["type"]))

    def increment_nonce(self, nonce):
        return hex(long(nonce[::-1].encode("hex"), 16) +
                   2)[2:-1].decode("hex")[::-1]

    def log(self, message, error=False):
        if error:
            print message
        else:
            self.script.post_message({"type": "log", "message": message})

    def dump(self, event, function=None, error=False):
        message = []
        if not function:
            function = event["type"]
        if error:
            function = function.rjust(31)
        message.append(function)
        message.append("--------------------".rjust(31))
        ordered = [
            "messageid", "snonce", "rnonce", "pk", "sk", "serverkey", "s", "k",
            "nonce", "message", "ciphertext"
        ]
        skipped = ["from", "type", "buffer"]
        intersection = [x for x in ordered if x in event.keys()]
        for key in intersection:
            if type(event[key]) in {Box, PrivateKey, PublicKey}:
                value = bytes(event[key]).encode("hex")
            elif type(event[key]) in {dict, bool}:
                value = str(event[key])
            elif type(event[key]) in {str, unicode}:
                value = event[key].encode("hex")
            else:
                value = event[key]
            message.append("".join(["".rjust(15), key.ljust(20), str(value)]))
        message.append("")
        self.log("\n".join(message), error=error)
        extra = set(event.keys()) - set(ordered) - set(skipped)
        if extra:
            self.log("Warning: Missed key(s) ({})".format(", ".join(extra)),
                     error=error)
Example #9
0
 def handle_event(self, event):
     for key in event:
         if key in {"type", "from"}:
             continue
         elif key == "messageid":
             event[key] = int(event[key], 16)
         elif type(event[key]) is bool:
             continue
         elif type(event[key]) in {str, unicode}:
             event[key] = event[key].decode("hex")
     if event["type"] == "socket":
         self.tee = Tee(
             os.path.join(self.BASE_DIR,
                          "session-{}.log".format(event["threadid"])))
         self.log("session started")
     elif event["type"] == "keypair":
         self.sk = PrivateKey(event["sk"])
         self.dump({"sk": self.sk}, function="PrivateKey")
     elif event["type"] == "send" or event["type"] == "recv":
         if event["messageid"] == 10100:
             event.update({"message": event["buffer"]})
             self.dump(event)
         elif event["messageid"] == 20100:
             event.update({"message": event["buffer"]})
             self.dump(event)
         else:
             if self.serverkey:
                 if self.sk:
                     if event["messageid"] == 10101:
                         self.pk = PublicKey(event["buffer"][:32])
                         self.dump({"pk": bytes(self.pk)},
                                   function="PublicKey")
                         event["buffer"] = event["buffer"][32:]
                     if self.pk:
                         if event["messageid"] == 10101 or self.snonce:
                             if event["messageid"] in {10101, 20104
                                                       } or self.rnonce:
                                 if event["messageid"] in {10101, 20104
                                                           } or self.k:
                                     if event["messageid"] in {
                                             10101, 20104
                                     }:
                                         k = Box(self.sk, self.serverkey)
                                         self.dump({"s": k}, function="Box")
                                         b2 = blake2b(digest_size=24)
                                         if event["messageid"] == 20104:
                                             b2.update(bytes(self.snonce))
                                         b2.update(bytes(self.pk))
                                         b2.update(bytes(self.serverkey))
                                         nonce = b2.digest()
                                         if event["messageid"] == 10101:
                                             self.dump(
                                                 {
                                                     "pk": self.pk,
                                                     "serverkey":
                                                     self.serverkey,
                                                     "nonce": nonce
                                                 },
                                                 function="blake2b")
                                         elif event["messageid"] == 20104:
                                             self.dump(
                                                 {
                                                     "snonce": self.snonce,
                                                     "pk": self.pk,
                                                     "serverkey":
                                                     self.serverkey,
                                                     "nonce": nonce
                                                 },
                                                 function="blake2b")
                                     else:
                                         k = self.k
                                         if event["type"] == "send":
                                             self.snonce = self.increment_nonce(
                                                 self.snonce)
                                             nonce = self.snonce
                                         elif event["type"] == "recv":
                                             self.rnonce = self.increment_nonce(
                                                 self.rnonce)
                                             nonce = self.rnonce
                                     ciphertext = event["buffer"]
                                     event.update({
                                         "k":
                                         k,
                                         "nonce":
                                         nonce,
                                         "ciphertext":
                                         event["buffer"]
                                     })
                                     try:
                                         message = k.decrypt(
                                             ciphertext, nonce)
                                     except:
                                         self.dump(event, error=True)
                                         self.log(
                                             "Warning: failed to decrypt {}"
                                             .format(event["messageid"]),
                                             error=True)
                                         if event["messageid"] in {
                                                 10101, 20104
                                         }:
                                             raise
                                     else:
                                         if event["messageid"] == 10101:
                                             self.snonce = message[24:48]
                                             self.dump(
                                                 {"snonce": self.snonce},
                                                 function="slice")
                                             message = message[48:]
                                         elif event["messageid"] == 20104:
                                             self.rnonce = message[:24]
                                             self.k = Box.decode(
                                                 message[24:56])
                                             self.dump(
                                                 {
                                                     "rnonce": self.rnonce,
                                                     "k": self.k
                                                 },
                                                 function="slice")
                                             message = message[56:]
                                         event.update({"message": message})
                                         self.dump(event)
                                 else:
                                     raise Exception(
                                         "Missing shared key ({}).".format(
                                             event["messageid"]))
                             else:
                                 raise Exception(
                                     "Missing server nonce ({}).".format(
                                         event["messageid"]))
                         else:
                             raise Exception(
                                 "Missing client nonce ({}).".format(
                                     event["messageid"]))
                     else:
                         raise Exception("Missing public key ({}).".format(
                             event["messageid"]))
                 else:
                     raise Exception("Missing secret key ({}).".format(
                         event["messageid"]))
             else:
                 raise Exception("Missing server key ({}).".format(
                     event["messageid"]))
     elif event["type"] == "closing":
         self.log("session closed")
     elif event["type"] == "close":
         self.tee.flush()
         self.tee.close()
     else:
         raise Exception("Invalid event type ({}).".format(event["type"]))
        exp_args = exp_tasks["experiment"]

        preproc_args = exp_tasks["preproc"]

        train_args = exp_tasks["train"]
        train_args.model_file = exp_args.model_file

        decode_args = exp_tasks["decode"]
        decode_args.trg_file = exp_args.hyp_file
        decode_args.model_file = None  # The model is passed to the decoder directly

        evaluate_args = exp_tasks["evaluate"]
        evaluate_args.hyp_file = exp_args.hyp_file
        evaluators = map(lambda s: s.lower(), exp_args.eval_metrics.split(","))

        output = Tee(exp_args.out_file, 3)
        err_output = Tee(exp_args.err_file, 3, error=True)

        # Do preprocessing
        print("> Preprocessing")
        xnmt_preproc.xnmt_preproc(preproc_args)

        # Do training
        for task_name in exp_tasks:
            if hasattr(exp_tasks[task_name], "random_search_report"):
                print("> instantiated random parameter search: %s" %
                      exp_tasks[task_name].random_search_report)

        print("> Training")
        xnmt_trainer = xnmt_train.XnmtTrainer(train_args)
        xnmt_trainer.decode_args = copy.copy(decode_args)
Example #11
0
ScriptName = os.path.basename(__file__)
PkgScripts = '/pkgscripts-ng'

sys.path.append(ScriptDir + '/include')
sys.path.append(ScriptDir + '/include/python')
import BuildEnv
from chroot import Chroot
from parallel import doPlatformParallel, doParallel
from link_project import link_projects, link_scripts, LinkProjectError
from tee import Tee
import config_parser
from project_visitor import UpdateHook, ProjectVisitor, UpdateFailedError, ConflictError
from version_file import VersionFile

log_file = os.path.join(BaseDir, 'pkgcreate.log')
sys.stdout = Tee(sys.stdout, log_file)
sys.stderr = Tee(sys.stderr, log_file, move=False)

MinSDKVersion = "6.0"
BasicProjects = set()


class PkgCreateError(RuntimeError):
    pass


class SignPackageError(PkgCreateError):
    pass


class CollectPackageError(PkgCreateError):