Beispiel #1
0
            id = Column(GUID(), primary_key=True)
            rse = Column(String)

    if "-n" in opts:
            nparts = int(opts["-n"])
    else:
            nparts = config.nparts(rse_name) or 1

    if nparts > 1:
            if out_prefix is None:
                    print("Output file path must be specified if partitioning is requested")
                    sys.exit(1)

    out_list = None
    if out_prefix is not None:
        out_list = PartitionedList.create(nparts, out_prefix, zout)

    subdir = config.dbdump_root(rse_name) or "/"
    if not subdir.endswith("/"):    subdir = subdir + "/"

    _, ignore_file_patterns = config.ignore_lists(rse_name)

    engine = create_engine(dbconfig.DBURL,  echo=verbose)
    Session = sessionmaker(bind=engine)
    session = Session()

    rse = session.query(RSE).filter(RSE.rse == rse_name).first()
    if rse is None:
            print ("RSE %s not found" % (rse_name,))
            sys.exit(1)
def main():
    opts, args = getopt.getopt(sys.argv[1:], "n:o:c:qr:z")
    opts = dict(opts)
    if not args or not ("-o" in opts):
        print(Usage)
        sys.exit(2)

    nparts = None
    out_prefix = opts["-o"]
    rewrite_match = rewrite_out = filter_in = remove_prefix = add_prefix = starts_with = None
    ignore_list = []
    if "-c" in opts:
        rse = opts["-r"]
        config = Config(opts.get("-c"))
        preprocess = config.rse_param(rse, "preprocess")
        ignore_list = config.rse_param(rse, "ignore_list") or []
        if preprocess is not None:
            ilist = preprocess.get("ignore_list")
            if ilist is not None:
                ignore_list = ilist
            filter_in = preprocess.get("filter")
            if filter_in is not None:
                #print("filtering:", filter_in)
                filter_in = re.compile(filter_in)
            starts_with = preprocess.get("starts_with")
            remove_prefix = preprocess.get("remove_prefix")
            add_prefix = preprocess.get("add_prefix")
            rewrite = preprocess.get("rewrite", {})
            if rewrite:
                rewrite_match = re.compile(rewrite["match"])
                rewrite_out = rewrite["out"]
            #print("rewriting:", rewrite["match"], rewrite["out"])
        nparts = config.nparts(rse)
    zout = "-z" in opts
    nparts = int(opts.get("-n", nparts))

    if nparts is None:
        print(
            "N parts must be specified either with -n or via the -c <config> and -r <rse>"
        )
        print(Usage)
        sys.exit(2)

    in_lst = PartitionedList.open(files=args)
    out_lst = PartitionedList.create(nparts, out_prefix, zout)

    #print("ignore list:", ignore_list)

    for path in in_lst:
        if starts_with and not path.startswith(starts_with): continue
        for ignore_path in ignore_list:
            #print(f"checking path {path} for ignore path {ignore_path}")
            if path.startswith(ignore_path):
                ignore = True
                break
        else:
            ignore = False
        if ignore: continue
        if filter_in is not None and not filter_in.search(path): continue
        if remove_prefix is not None:
            if not path.startswith(remove_prefix):
                sys.stderr.write(
                    f"Path {path} does not begin with prefix {remove_prefix}\n"
                )
                sys.exit(1)
            path = path[len(remove_prefix):]
        if add_prefix:
            path = add_prefix + path
        if rewrite_match is not None:
            if not rewrite_match.search(path):
                sys.stderr.write(
                    f"Path rewrite pattern did not find a match in path {path}\n"
                )
                sys.exit(1)
            path = rewrite_match.sub(rewrite_out, path)
        #print("path:", type(path), path)
        out_lst.add(path)
    out_lst.close()

    print(out_lst.NWritten)
    zout = "-z" in opts
    
    if "-n" in opts:
        nparts = int(opts["-n"])
    else:
        nparts = config.nparts(rse)

    if nparts > 1:
        if not "-o" in opts:
            print ("Output prefix is required for partitioned output")
            print (Usage)
            sys.exit(2)

    output = opts.get("-o","out.list")

    out_list = PartitionedList.create(nparts, output, zout)

    dir_output = opts.get("-d")
    dir_list = PartitionedList.create(nparts, dir_output, zout) if dir_output else None

    server = config.scanner_server(rse)
    server_root = config.scanner_server_root(rse)
    include_sizes = config.scanner_include_sizes(rse) and not "-x" in opts
    purge_empty_dirs = config.scanner_param(rse, "purge_empty_dirs", default=False)
    if not server_root:
        print(f"Server root is not defined for {rse}. Should be defined as 'server_root'")
        sys.exit(2)

    my_stats = {
        "rse":rse,
        "scanner":{
Beispiel #4
0
def cmp3_parts(a_prefix, r_prefix, b_prefix):
    a_list = PartitionedList.open(a_prefix)
    r_list = PartitionedList.open(r_prefix)
    b_list = PartitionedList.open(b_prefix)
    return cmp3_lists(a_list, r_list, b_list)
Beispiel #5
0
def main():
    import getopt, json

    t0 = time.time()

    opts, args = getopt.getopt(sys.argv[1:], "s:S:z")
    opts = dict(opts)

    if len(args) < 5:
        print(Usage)
        sys.exit(2)
    compress = "-z" in opts
    stats_file = opts.get("-s")
    stats_key = opts.get("-S", "cmp3")
    stats = Stats(stats_file) if stats_file else None

    b_prefix, r_prefix, a_prefix, out_dark, out_missing = args

    a_list = PartitionedList.open(a_prefix)
    r_list = PartitionedList.open(r_prefix)
    b_list = PartitionedList.open(b_prefix)

    my_stats = {
        "version": Version,
        "elapsed": None,
        "start_time": t0,
        "end_time": None,
        "missing": None,
        "dark": None,
        "missing_list_file": None,
        "dark_list_file": None,
        "b_prefix": b_prefix,
        "a_prefix": a_prefix,
        "r_prefix": r_prefix,
        "a_files": a_list.FileNames,
        "b_files": b_list.FileNames,
        "r_files": r_list.FileNames,
        "a_nfiles": a_list.NParts,
        "b_nfiles": b_list.NParts,
        "r_nfiles": r_list.NParts,
        "status": "started"
    }

    if stats is not None:
        stats[stats_key] = my_stats

    if compress:
        if not out_dark.endswith(".gz"): out_dark += ".gz"
        if not out_missing.endswith(".gz"): out_missing += ".gz"
        fd = gzip.open(out_dark, "wt")
        fm = gzip.open(out_missing, "wt")
    else:
        fd = open(out_dark, "w")
        fm = open(out_missing, "w")

    diffs = cmp3_generator(a_list, r_list, b_list)
    nm = nd = 0
    for t, path in diffs:
        if t == 'd':
            fd.write(path)
            nd += 1
        else:
            fm.write(path)
            nm += 1
    fd.close()
    fm.close()

    print("Found %d dark and %d missing replicas" % (nd, nm))
    t1 = time.time()

    my_stats.update({
        "elapsed": t1 - t0,
        "end_time": t1,
        "missing": nm,
        "dark": nd,
        "status": "done",
        "missing_list_file": out_missing,
        "dark_list_file": out_dark
    })

    if stats is not None:
        stats[stats_key] = my_stats

    t = int(t1 - t0)
    s = t % 60
    m = t // 60
    print("Elapsed time: %dm%02ds" % (m, s))
    Session = sessionmaker(bind=engine)
    session = Session()

    rse = session.query(RSE).filter(RSE.rse == rse_name).first()
    if rse is None:
        print("RSE %s not found" % (rse_name, ))
        sys.exit(1)

    rse_id = rse.id

    #print ("rse_id:", type(rse_id), rse_id)

    batch = 100000

    outputs = {
        states: PartitionedList.create(nparts, prefix, zout)
        for states, prefix in filters.items()
    }

    all_replicas = '*' in all_states

    replicas = session.query(Replica).filter(
        Replica.rse_id == rse_id).yield_per(batch)

    if all_replicas:
        sys.stderr.write("including all replias\n")
    else:
        print("including replicas in states:",
              list(all_states),
              file=sys.stderr)
        replicas = replicas.filter(Replica.state.in_(list(all_states)))
Beispiel #7
0
def main():
    import getopt

    t0 = time.time()

    opts, args = getopt.getopt(sys.argv[1:], "s:S:zf")
    opts = dict(opts)

    if len(args) < 4:
        print(Usage)
        sys.exit(2)

    stats_file = opts.get("-s")
    stats_key = opts.get("-S", "join")
    compress = "-z" in opts
    single_file = "-f" in opts

    my_stats = stats = None

    op, a_spec, b_spec, out_spec = args

    if single_file:
        a_list = PartitionedList.open(files=[a_spec])
        b_list = PartitionedList.open(files=[b_spec])
        out_list = PartitionedList.create_file(out_spec)
    else:
        a_list = PartitionedList.open(prefix=a_spec)
        b_list = PartitionedList.open(prefix=b_spec)
        if a_list.NParts != b_list.NParts:
            print("Inconsistent number of parts: %s:%d: %s:%d" %
                  (a_spec, a_list.NParts, b_spec, b_list.NParts))
            sys.exit(1)
        out_list = PartitionedList.create(a_list.NParts, out_spec)

    if stats_file is not None:
        stats = Stats(stats_file)
        my_stats = {
            "version": Version,
            "elapsed": None,
            "start_time": t0,
            "end_time": None,
            "a_list_files": 0,
            "b_list_files": 0,
            "join_list_files": 0,
            "operation": op,
            "b_prefix": b_spec,
            "a_prefix": a_spec,
            "out_prefix": out_spec,
            "a_files": a_list.FileNames,
            "b_files": b_list.FileNames,
            "out_files": out_list.FileNames,
            "nparts": a_list.NParts,
            "status": "started"
        }
        stats[stats_key] = my_stats

    n_a_files = 0
    n_b_files = 0
    n_out_files = 0

    for pa, pb in zip(a_list.parts(), b_list.parts()):
        b_set = set(pb)
        n_b_files += len(b_set)
        for f in pa:
            n_a_files += 1
            if op == "and":
                if f in b_set:
                    out_list.add(f)
                    n_out_files += 1
            elif op == "minus":
                if not f in b_set:
                    out_list.add(f)
                    n_out_files += 1
            elif op == "xor":
                if f in b_set:
                    b_set.remove(f)
                else:
                    out_list.add(f)
                    n_out_files += 1
            elif op == "or":
                if f in b_set:
                    b_set.remove(f)
                out_list.add(f)
                n_out_files += 1
        if op in ("or", "xor"):
            for f in b_set:
                out_list.add(f)
                n_out_files += 1

    t1 = time.time()

    if stats_file:
        my_stats.update({
            "elapsed": t1 - t0,
            "end_time": t1,
            "a_list_files": n_a_files,
            "b_list_files": n_b_files,
            "join_list_files": join_list_files,
            "status": "done"
        })
        stats[stats_key] = my_stats