Ejemplo n.º 1
0
    def _decode(self, data):
        """ Reverse the _encode operation. Convert database strings into native types.

        Args:
            data: The string to decode

        Returns:
            Native data type

        """

        if not isinstance(data, basestring):
            return data

        out = data
        if self._get_from_disk and data[:10].find('file://') == 0:
            # Load data file
            try:
                out = self.get_file(data[7:])
            except Exception as e:
                log.error("{} reading data file linked from SQL: {}".format(
                    e.__class__.__name__, e))

        elif data[:10].find('base64://') == 0:
            try:
                out = bytearray(base64.b64decode(data[9:]))
            except Exception as e:
                log.error('{} parsing base 64 data from SQL: {}'.format(
                    e.__class__.__name__, e))

        # Hex dealt with differently to be backwards compatible
        # Instead of looking for marker, it is just assumed that all strings in the
        # format AA-BB-XX... are binary strings
        elif (_ALLOW_OLDSTYLE_HEX
              and (len(data.split('-')) > 1 and
                   (all([len(x) == 2 for x in data.split('-')])))):
            try:
                out = bytearray(base64.b16decode(''.join(data.split('-'))))
            except Exception as e:
                log.error('{} parsing hex data from SQL: {}'.format(
                    e.__class__.__name__, e))
        # New style hex encoding uses the hex:// keyword
        # NOTE: it is still splitting by '-' but the '-' in the encoded string is optional. It will decode either way
        elif (data[:10].find('hex://') == 0):
            try:
                out = bytearray(base64.b16decode(''.join(data[6:].split('-'))))
            except Exception as e:
                log.error('{} parsing hex data from SQL: {}'.format(
                    e.__class__.__name__, e))

        elif data[:15].find('ts-ephem://') == 0:
            out = conv_time(data[11:], to='ephem')
        elif data[:15].find('ts-pandas://') == 0:
            out = conv_time(data[12:], to='pandas')
        elif data[:15].find('ts-dtime://') == 0:
            out = conv_time(data[11:], to='datetime')

        return out
Ejemplo n.º 2
0
    def epilogue(self):
        total_dirs, total_files, total_filesize, total_symlinks, total_skipped, maxfiles = self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            fmt_msg1 = "\t{:<25}{:<20,}"    # numeric
            fmt_msg2 = "\t{:<25}{:<20}"     # string

            print(fmt_msg1.format("Directory count:", total_dirs))
            print(fmt_msg1.format("Sym Links count:", total_symlinks))
            print(fmt_msg1.format("File count:", total_files))
            print(fmt_msg1.format("Skipped count:", total_skipped))
            print(fmt_msg2.format("Total file size:", bytes_fmt(total_filesize)))
            if total_files != 0:
                print(fmt_msg2.format("Avg file size:", bytes_fmt(total_filesize/float(total_files))))
            print(fmt_msg1.format("Max files within dir:", maxfiles))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int((total_files + total_dirs + total_symlinks + total_skipped) / elapsed_time)
            print(fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time)))
            print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", taskloads))
            print("")

        return total_filesize
Ejemplo n.º 3
0
def tfilterstr2query(filters):
    """
    Convert a timestamp filter string into a well formated
    where query
    """

    query = []
    for f in filters:
        if f[0] in ['>', '<']:
            if f[1] == '=':
                op = f[:2]
                f1 = f[2:]
            else:
                op = f[0]
                f1 = f[1:]
        else:
            op = '='
            f1 = f

        try:
            tstamp = conv_time(f1, to='julian')
        except:
            return None

        query += [op + str(tstamp)]

    return (TSTAMP_LBL + (' AND {}'.format(TSTAMP_LBL)).join(query) + ' ')
Ejemplo n.º 4
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFWALK Epilogue:\n")
            print("\t{:<20}{:<20}".format("Directory count:", T.total_dirs))
            print("\t{:<20}{:<20}".format("Sym Links count:",
                                          T.total_symlinks))
            print("\t{:<20}{:<20}".format("File count:", T.total_files))
            print("\t{:<20}{:<20}".format("Skipped count:", T.total_skipped))
            print("\t{:<20}{:<20}".format("Total file size:",
                                          bytes_fmt(T.total_filesize)))
            if T.total_files != 0:
                print("\t{:<20}{:<20}".format(
                    "Avg file size:",
                    bytes_fmt(T.total_filesize / float(T.total_files))))
            print("\t{:<20}{:<20}".format(
                "Tree talk time:",
                utils.conv_time(self.time_ended - self.time_started)))
            print("\t{:<20}{:<20}".format("Use store flist:",
                                          "%s" % self.use_store))
            print("\t{:<20}{:<20}".format("Use store workq:",
                                          "%s" % self.circle.use_store))
            print("\tFWALK Loads: %s" % taskloads)
            print("")
Ejemplo n.º 5
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            fmt_msg1 = "\t{:<25}{:<20,}"    # numeric
            fmt_msg2 = "\t{:<25}{:<20}"     # string

            print(fmt_msg1.format("Directory count:", Tally.total_dirs))
            print(fmt_msg1.format("Sym links count:", Tally.total_symlinks))
            print(fmt_msg1.format("Hard linked files:", Tally.total_nlinked_files))
            print(fmt_msg1.format("File count:", Tally.total_files))
            if args.profdev:
                print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt))
                print(fmt_msg2.format("Dev file size:", bytes_fmt(Tally.devfile_sz)))
            print(fmt_msg1.format("Skipped count:", Tally.total_skipped))
            print(fmt_msg2.format("Total file size:", bytes_fmt(Tally.total_filesize)))
            if Tally.total_files != 0:
                print(fmt_msg2.format("Avg file size:",
                                      bytes_fmt(Tally.total_filesize/float(Tally.total_files))))
            print(fmt_msg1.format("Max files within dir:", Tally.max_files))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int((Tally.total_files + Tally.total_dirs + Tally.total_symlinks + Tally.total_skipped) / elapsed_time)
            print(fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time)))
            print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", taskloads))
            print("")

            sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src))
            sendto_syslog("fprof.version", "%s" % __version__)

            sendto_syslog("fprof.dir_count", Tally.total_dirs)
            sendto_syslog("fprof.sym_count", Tally.total_symlinks)
            sendto_syslog("fprof.file_count", Tally.total_files)
            sendto_syslog("fprof.total_file_size", bytes_fmt(Tally.total_filesize))
            if Tally.total_files > 0:
                sendto_syslog("fprof.avg_file_size", bytes_fmt(Tally.total_filesize/float(Tally.total_files)))
            sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time))
            sendto_syslog("fprof.scan_rate", processing_rate)

        return Tally.total_filesize
Ejemplo n.º 6
0
 def epilogue(self):
     global taskloads
     self.wtime_ended = MPI.Wtime()
     taskloads = self.circle.comm.gather(self.reduce_items)
     if self.circle.rank == 0:
         if self.totalsize == 0:
             print("\nZero filesize detected, done.\n")
             return
         tlapse = self.wtime_ended - self.wtime_started
         rate = float(self.totalsize) / tlapse
         print("\nFCP Epilogue:\n")
         print("\t{:<20}{:<20}".format("Ending at:", utils.current_time()))
         print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse)))
         print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate)))
         print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))
Ejemplo n.º 7
0
 def epilogue(self):
     global taskloads
     self.wtime_ended = MPI.Wtime()
     taskloads = self.circle.comm.gather(self.reduce_items)
     if self.circle.rank == 0:
         if self.totalsize == 0:
             print("\nZero filesize detected, done.\n")
             return
         tlapse = self.wtime_ended - self.wtime_started
         rate = float(self.totalsize) / tlapse
         print("\nFCP Epilogue:\n")
         print("\t{:<20}{:<20}".format("Ending at:", utils.current_time()))
         print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse)))
         print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate)))
         print("\t{:<20}{:<20}".format("Use store chunksums:", "%s" % self.use_store))
         print("\t{:<20}{:<20}".format("Use store workq:", "%s" % self.circle.use_store))
         print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))
Ejemplo n.º 8
0
    def epilogue(self):
        total_dirs, total_files, total_filesize, total_symlinks, total_skipped = self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFWALK Epilogue:\n")
            print("\t{:<20}{:<20}".format("Directory count:", total_dirs))
            print("\t{:<20}{:<20}".format("Sym Links count:", total_symlinks))
            print("\t{:<20}{:<20}".format("File count:", total_files))
            print("\t{:<20}{:<20}".format("Skipped count:", total_skipped))
            print("\t{:<20}{:<20}".format("Total file size:", bytes_fmt(total_filesize)))
            if total_files != 0:
                print("\t{:<20}{:<20}".format("Avg file size:", bytes_fmt(total_filesize/float(total_files))))
            print("\t{:<20}{:<20}".format("Tree talk time:", utils.conv_time(self.time_ended - self.time_started)))
            print("\tFWALK Loads: %s" % taskloads)
            print("")

        return total_filesize
Ejemplo n.º 9
0
def gen_signature(bfsign, totalsize):
    """ Generate a signature for dataset, it assumes the checksum
       option is set and done """
    if comm.rank == 0:
        print("\nAggregating dataset signature ...\n")
    tbegin = MPI.Wtime()
    sig = aggregate_checksums(bfsign)
    tend = MPI.Wtime()
    if comm.rank == 0:
        #print("\t{:<20}{:<20}".format("Aggregated chunks:", size))
        print("\t{:<20}{:<20}".format("Running time:", utils.conv_time(tend - tbegin)))
        print("\t{:<20}{:<20}".format("SHA1 Signature:", sig))
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sig)
            f.write("chunksize: %s\n" % fcp.chunksize)
            f.write("fcp version: %s\n" % __version__)
            f.write("src: %s\n" % fcp.src)
            f.write("destination: %s\n" % fcp.dest)
            f.write("date: %s\n" % utils.current_time())
            f.write("totoalsize: %s\n" % utils.bytes_fmt(totalsize))
Ejemplo n.º 10
0
            def _pthr_record():
                spec = [None]*N
                ok_idx = [False] * N
                tvec   = [0.0] * N
                fvec = []
                abort.clear()
                
                t0 = time.time()
                
                last_spec = None
                for n in range(N):
                    try:

                        t0 = time.time()                            
                        tvec[n] = t0


                        dt1 = (dt - (time.time() - t0))
                        if dt1 > 2.0:
                            if abort in wait_loop([abort], timeout=dt1):
                                break
                        else:
                            time.sleep(dt1)
                            if abort.is_set():
                                break
                        
                        
                        try:
                            ret = self.get_spectrum()
                        except:
                            continue
                        

                        if len(ret) != 2:
                            raise Error("get_spectrum did not return valid freq, spec")

                        fv, sp = ret[0][::fdec], ret[1][::fdec]
                        
                        # Check if there is something new, if not skip
                        if last_spec is not None and all(sp[i] == last_spec[i] for i in range(len(sp))):
                            continue

                        last_spec = sp[:]

                        fvec, spec[n] = fv, sp 
                        ok_idx[n] = True

                        

                    except Exception as e:
                        log.error("Error {} while recording spectrum. Ignoring and continuing: {}".format(e.__class__.__name__, e))
               
                spec = [[0.0]*len(fvec) if x is None else list(x) for x in spec]

                lens = [len(s) for s in spec]
                if not all([l == lens[0] for l in lens]):
                    raise Error("Unexpected error recording spectra. Not all spectra are same length")
                    

                if len(fvec) == 0:
                    raise Error("No data recorded")

                # Trim data that wasnt filled (in case abort wwas called)
                spec = spec[:(n+1)]
                tvec = tvec[:(n+1)]
                ok_idx = ok_idx[:(n+1)]

                spec = np.array(spec)
                # Get rid of bad data unless zeroes are requested
                if not add_zeroes:                    
                    spec = spec[ok_idx, :]
                    tvec = [tvec[k] for k,ok in enumerate(ok_idx) if ok]
                    
                dvec = [conv_time(t, to="datetime", float_t="unix") for t in tvec]
                return fvec, dvec, spec
Ejemplo n.º 11
0
def main():
    global args, log, circle, fcp, treewalk
    # This might be an overkill function
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)
    tally_hosts()
    G.loglevel = args.loglevel
    G.fix_opt = False if args.no_fixopt else True
    G.preserve = args.preserve
    G.resume = True if args.cpid else False
    G.reduce_interval = args.reduce_interval
    G.verbosity = args.verbosity
    G.am_root = True if os.geteuid() == 0 else False

    if args.signature:  # with signature implies doing verify as well
        args.verify = True

    G.src, G.dest = check_source_and_target(args.src, args.dest)
    dbname = get_workq_name()

    circle = Circle()
    circle.dbname = dbname

    if args.rid:
        circle.resume = True
        args.signature = False # when recovery, no signature

    if not args.cpid:
        ts = utils.timestamp()
        args.cpid = circle.comm.bcast(ts)

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<25}{:<20}".format("Starting at:", utils.current_time()))
        print("\t{:<25}{:<20}".format("FCP version:", __version__))
        print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src)))
        print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest)))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|",
            "Num of Processes:", comm.size))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|",
            "Copy Verification:", "%r" % args.verify))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|",
            "Stripe Preserve:", "%r" % G.preserve))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|",
            "Checkpoint ID:", "%s" % args.cpid))

        #
        if args.verbosity > 0:
            print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype))


    fcp_start()

    if args.pause and args.verify:
        if circle.rank == 0:
            # raw_input("\n--> Press any key to continue ...\n")
            print("Pause, resume after %s seconds ..." % args.pause)
            sys.stdout.flush()
        time.sleep(args.pause)
        circle.comm.Barrier()

    # do checksum verification
    if args.verify:
        circle = Circle()
        pcheck = PVerify(circle, fcp, G.totalsize)
        circle.begin(pcheck)
        tally = pcheck.fail_tally()
        tally = comm.bcast(tally)
        if circle.rank == 0:
            print("")
            if tally == 0:
                print("\t{:<20}{:<20}".format("Result:", "PASS"))
            else:
                print("\t{:<20}{:<20}".format("Result:", "FAILED"))

        comm.Barrier()

        if args.signature and tally == 0:
            gen_signature(fcp, G.totalsize)

    # fix permission
    comm.Barrier()
    if G.fix_opt and treewalk:
        if comm.rank == 0:
            print("\nFixing ownership and permissions ...")
        fix_opt(treewalk)

    if treewalk:
        treewalk.cleanup()

    if fcp:
        fcp.epilogue()
        fcp.cleanup()

    # if circle:
    #     circle.finalize(cleanup=True)
    # TODO: a close file error can happen when circle.finalize()
    #
    if isinstance(circle.workq, DbStore):
        circle.workq.cleanup()
Ejemplo n.º 12
0
    def _encode(self, data):
        """
        Encode datatypes appropriately for storing in database
        Args:
            data: The data to encode

        Returns:
            The data in a format suitable for storing in database
        """

        types_to_encode = [
            basestring, bytearray, eDate, pd.Timestamp, datetime
        ]
        types_to_skip = [int, float, type(None)]

        # Only worry about encoding strings, bytearrays or datetime objects, for anything else. Try to just make it
        # a dumb string using the __str__ method. Note that this is a non-nominal situation
        if not any([isinstance(data, t) for t in types_to_encode]):
            if any([isinstance(data, t) for t in types_to_skip]):
                # These types are ignored
                return data

            else:
                # Other strange types will probably not be possible to save in database, so try to conver dumbly to a
                # string.

                log.debug(
                    "_encode: No encoding handler for type {}, trying to convert dumbly to string"
                    .format(type(data)))
                try:
                    return str(data)
                except:
                    log.error(
                        "_encode: Failed to convert type {} to string, database entry will be replaced with 'ERROR'"
                        .format(type(data)))
                    return "ERROR"

        # Handle normal string data
        if isinstance(data, basestring):

            # Make sure strings are propely utf-8 encoded
            if isinstance(data, unicode):
                data = data.encode('utf-8')

            if self._disk_threshold_text is not None and len(
                    data) > self._disk_threshold_text:

                try:
                    #
                    # 1.1 Data meets condition to be be saved to disk. Try to do so.
                    #
                    fname = datetime.strftime(datetime.utcnow(),
                                              '%Y%m%d%H%M%S')
                    fname += '_' + ''.join([
                        random.choice(string.ascii_letters + string.digits)
                        for i in range(8)
                    ])
                    fname += '.txt'
                    full_fname = '{}/{}'.format(self._disk_path, fname)

                    with open(full_fname, 'w') as fp:
                        fp.write(data)

                    log.debug(
                        "data is a text field but len(data) > {} and has so it has been stored in filesystem"
                        .format(self._disk_threshold))
                    return 'file://' + fname

                except Exception as e:
                    #
                    # 1.2 Data failed to save to disk, return it so that at least it is saved in db
                    #
                    log.error(
                        "{}: len(data) > {} but failed too save to filesystem: {}"
                        .format(e.__class__.__name__, self._disk_threshold, e))
                    return data

            else:
                #
                # 1.2 Data is small enough to return without saving to disk
                #
                return data

        # binary data
        elif isinstance(data, bytearray):
            if self._disk_threshold is not None and len(
                    data) > self._disk_threshold:

                try:
                    fname = datetime.strftime(datetime.utcnow(),
                                              '%Y%m%d%H%M%S')
                    fname += '_' + ''.join([
                        random.choice(string.ascii_letters + string.digits)
                        for i in range(8)
                    ])
                    fname += '.bin'
                    full_fname = '{}/{}'.format(self._disk_path, fname)
                    with open(full_fname, 'wb') as fp:
                        fp.write(data)

                    log.debug(
                        "data is binary and len(data) > {} so it has been stored in filesystem"
                        .format(self._disk_threshold))

                    #
                    # 2.1 Data is binary and met conditions for saving to disk
                    #
                    return 'file://{}'.format(fname)
                except Exception as e:

                    #
                    # 2.2 Data is binary but failed to save to disk. Continue with data conversion so at least it is saved in database
                    #
                    log.error(
                        "{}: len(data) > {} but failed to save to filesystem: {}"
                        .format(e.__class__.__name__, self._disk_threshold, e))

            if self._binary_fmt == 'b64':

                #
                # 2.3 a. Return base64 encoded data
                #
                return 'base64://' + base64.b64encode(data)
            elif self._binary_fmt == 'hex':
                #
                # 2.3 b. Return hex encoded data
                #
                b16data = base64.b16encode(data)
                return 'hex://' + '-'.join(
                    [b16data[n:n + 2] for n in range(len(b16data))[::2]])
            else:
                raise Error("Unexpected binary format {}".format(
                    self._binary_fmt))

        elif isinstance(data, eDate):
            return 'ts-ephem://' + conv_time(data, to='iso')
        elif isinstance(data, pd.Timestamp):
            return 'ts-pandas://' + conv_time(data, to='iso')
        elif isinstance(data, datetime):
            return 'ts-dtime://' + conv_time(data, to='iso')
        else:
            raise Error("Unexpected. This should not be possible")
Ejemplo n.º 13
0
    def get_df(self,
               table,
               where=None,
               limit=None,
               orderby=None,
               descending=True,
               raw=False):
        """ Get data from communications database as a :py:class:`pandas.DataFrame` object

        Args:
            table (str):                    The table to fetch from
            where (str, optional):          SQL syntax to append to query as WHERE=...
            limit (int, optional):          Max number of rows to return
            orderby (str, optional):        The column to sort by
            descending (bool, optional):    If true: sort in descending order, otherwise sort in ascending order.
                                            Has no effect if orderby = None
            raw (bool, optional):           If true, do not decode anything in the database into native objects. Most
                                            data will be returned as strings with encoding flags intact.

        Returns:
            None

        """

        sql = "SELECT * from %s" % (table)

        if where is not None:
            sql += " WHERE {}".format(where)

        if orderby is None:
            orderby = TSTAMP_LBL

        if descending:
            order = 'DESC'
        else:
            order = 'ASC'

        if descending is not None:
            sql += " ORDER BY {} {}".format(orderby, order)

        if limit is not None:
            if isinstance(limit, int):
                sql += " LIMIT {}".format(limit)
            else:
                raise Error(
                    "Limit must be an integer, got {} of type {}".format(
                        limit, type(limit)))

        # We ideally need to convert the sql string to raw, but or pd.read_sql seems to fail for some reason
        # but I cant find an obvious way to do so, so lets just replace % with %% for now.
        # TODO: come up with a more general solution
        df = pd.read_sql(sql.replace(r'%', r'%%'), self._eng)

        if not raw:

            # TSTAMP_LBL is a floating point julian date. Convert it to datetime too if we arent requesting raw
            try:
                df[TSTAMP_LBL] = df[TSTAMP_LBL].apply(
                    lambda s: conv_time(s, to='datetime', float_t='julian'))
            except Exception as e:
                log.error(
                    "Failed to convert timestamp to datetime. {}:{}".format(
                        e.__class__.__name__))

            # Decode remaining fields. But do not touch column names
            # that start with '_' (internal/private)
            columns = [d for d in df.columns if d[0] != '_']
            df[columns] = df[columns].applymap(self._decode)

        return df
Ejemplo n.º 14
0
def main():
    global args, log, circle, fcp, treewalk
    # This might be an overkill function
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)
    tally_hosts()
    G.loglevel = args.loglevel
    G.fix_opt = False if args.no_fixopt else True
    G.preserve = args.preserve
    G.resume = True if args.cpid else False
    G.reduce_interval = args.reduce_interval
    G.verbosity = args.verbosity
    G.am_root = True if os.geteuid() == 0 else False
    G.memitem_threshold = args.item

    if args.signature:  # with signature implies doing verify as well
        args.verify = True

    if args.rid:
        G.resume = True
        args.force = True
        G.rid = args.rid
        args.signature = False # when recovery, no signature

    if not args.cpid:
        ts = utils.timestamp()
        args.cpid = MPI.COMM_WORLD.bcast(ts)

    G.tempdir = os.path.join(os.getcwd(),(".pcircle" + args.cpid))
    if not os.path.exists(G.tempdir):
        try:
            os.mkdir(G.tempdir)
        except OSError:
            pass

    G.src, G.dest = check_source_and_target(args.src, args.dest)
    dbname = get_workq_name()

    circle = Circle(dbname="fwalk")
    #circle.dbname = dbname

    global oflimit

    if num_of_hosts != 0:
        max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
        procs_per_host = circle.size // num_of_hosts
        oflimit = ((max_ofile - 64) // procs_per_host) // 2
    if oflimit < 8:
            oflimit = 8


    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<25}{:<20}".format("Starting at:", utils.current_time()))
        print("\t{:<25}{:<20}".format("FCP version:", __version__))
        print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src)))
        print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest)))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|",
            "Num of Processes:", comm.size))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|",
            "Copy Verification:", "%r" % args.verify))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|",
            "Stripe Preserve:", "%r" % G.preserve))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|",
            "Checkpoint ID:", "%s" % args.cpid))

        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Items in memory: ",
            " % r" % G.memitem_threshold, "|", "O file limit", "%s" % oflimit))
        #
        if args.verbosity > 0:
            print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype))

    fcp_start()

    if args.pause and args.verify:
        if circle.rank == 0:
            # raw_input("\n--> Press any key to continue ...\n")
            print("Pause, resume after %s seconds ..." % args.pause)
            sys.stdout.flush()
        time.sleep(args.pause)
        circle.comm.Barrier()

    # do checksum verification
    if args.verify:
        circle = Circle(dbname="verify")
        pcheck = PVerify(circle, fcp, G.total_chunks, T.total_filesize, args.signature)
        circle.begin(pcheck)
        circle.finalize()
        tally = pcheck.fail_tally()
        tally = comm.bcast(tally)
        if circle.rank == 0:
            print("")
            if tally == 0:
                print("\t{:<20}{:<20}".format("Verify result:", "PASS"))
            else:
                print("\t{:<20}{:<20}".format("Verify result:", "FAILED"))

        comm.Barrier()

        if args.signature and tally == 0:
            gen_signature(pcheck.bfsign, T.total_filesize)

    # fix permission
    comm.Barrier()
    if G.fix_opt and treewalk:
        if comm.rank == 0:
            print("\nFixing ownership and permissions ...")
        fix_opt(treewalk)

    if treewalk:
        treewalk.cleanup()
    if fcp:
        fcp.cleanup()
    #if circle:
    #    circle.finalize(cleanup=True)
    comm.Barrier()
    if comm.rank == 0:
        try:
            os.rmdir(G.tempdir)
        except:
            pass
Ejemplo n.º 15
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            if py_version() != "py26":
                fmt_msg1 = "\t{0:<25}{1:<20,}"  # numeric
            else:  # 2.6 compat
                fmt_msg1 = "\t{0:<25}{1:<20}"  # numeric

            fmt_msg2 = "\t{0:<25}{1:<20}"  # string
            fmt_msg3 = "\t{0:<25}{1:<20.2f}"  # float
            print(fmt_msg1.format("Directory count:", Tally.total_dirs))
            print(fmt_msg1.format("Sym links count:", Tally.total_symlinks))
            print(
                fmt_msg1.format("Hard linked files:",
                                Tally.total_nlinked_files))
            print(fmt_msg1.format("File count:", Tally.total_files))
            print(fmt_msg1.format("Zero byte files:", Tally.total_0byte_files))
            print(fmt_msg1.format("Sparse files:", Tally.total_sparse))

            if args.profdev:
                print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt))
                print(
                    fmt_msg2.format("Dev file size:",
                                    bytes_fmt(Tally.devfile_sz)))
            print(fmt_msg1.format("Skipped count:", Tally.total_skipped))
            print(
                fmt_msg2.format("Total file size:",
                                bytes_fmt(Tally.total_filesize)))

            if args.cpr:
                compressed = float(Tally.total_blocks * 512)
                uncompressed = float(Tally.total_stat_filesize)
                ratio = uncompressed / compressed
                saving = 1 - compressed / uncompressed
                print(fmt_msg3.format("Compression Ratio:", ratio))
                print(fmt_msg3.format("Compression Saving:", saving))

            if Tally.total_files != 0:
                print(
                    fmt_msg2.format(
                        "Avg file size:",
                        bytes_fmt(Tally.total_filesize /
                                  float(Tally.total_files))))
            print(fmt_msg1.format("Max files within dir:", Tally.max_files))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int(
                (Tally.total_files + Tally.total_dirs + Tally.total_symlinks +
                 Tally.total_skipped) / elapsed_time)
            print(
                fmt_msg2.format("Tree walk time:",
                                utils.conv_time(elapsed_time)))
            print(
                fmt_msg2.format("Scanning rate:",
                                str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", Tally.taskloads))
            print("")

            if args.syslog:
                sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src))
                sendto_syslog("fprof.version", "%s" % __version__)
                sendto_syslog("fprof.revid", "%s" % __revid__)
                sendto_syslog("fprof.dir_count", Tally.total_dirs)
                sendto_syslog("fprof.sym_count", Tally.total_symlinks)
                sendto_syslog("fprof.file_count", Tally.total_files)
                sendto_syslog("fprof.total_file_size",
                              bytes_fmt(Tally.total_filesize))
                if Tally.total_files > 0:
                    sendto_syslog(
                        "fprof.avg_file_size",
                        bytes_fmt(Tally.total_filesize /
                                  float(Tally.total_files)))
                sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time))
                sendto_syslog("fprof.scan_rate", processing_rate)

        return Tally.total_filesize
Ejemplo n.º 16
0
 def format_time(t):
     try:
         return conv_time(t, to='datetime').strftime('%Y-%m-%d %H:%M:%S')
     except:
         return None