Example #1
0
    def __init__(self, circle, src, perfile=True):

        self.logger = utils.getLogger(__name__)

        self.d = {"rank": "rank %s" % circle.rank}
        self.circle = circle
        self.src = src
        self.interval = 10  # progress report

        # hard links
        self.nlinks = 0
        self.nlinked_files = 0

        self.pipes = 0
        self.sockets = 0
        self.sym_links = 0
        self.follow_sym_links = False

        if perfile:
            tmpfile = os.path.join(
                os.getcwd(), "fprof-perfile.%s" % circle.rank)
            self.outfile = open(tmpfile, "w")
            self.fszlst = []    # store perfile size
        else:
            self.outfile = None

        self.cnt_dirs = 0
        self.cnt_files = 0
        self.cnt_filesize = 0
        self.cnt_stat_filesize = 0  # uncompressed
        self.cnt_0byte = 0
        self.last_cnt = 0
        self.skipped = 0
        self.maxfiles = 0
        self.maxfiles_dir = None
        self.devfile_cnt = 0        # track # of dev files
        self.devfile_sz = 0         # track size of dev files
        self.last_reduce_time = MPI.Wtime()
        self.sparse_cnt = 0
        self.cnt_blocks = 0

        # reduce
        self.reduce_items = 0

        self.time_started = MPI.Wtime()
        self.time_ended = None
Example #2
0
    def __init__(self, dbname, resume=False):

        self.dbname = dbname
        self.conn = None
        self.logger = getLogger(__name__)

        # debug, dbstore doesn't have to be tied with rank.
        # so, it is set to be empty
        # self.d = {"rank" : "rank %s" % self.rank}
        self.d = {"rank": ''}

        try:
            self.conn = sqlite3.connect(dbname)
        except sqlite3.OperationalError as e:
            self.logger.error(e, extra=self.d)
            sys.exit(1)
        self.cur = self.conn.cursor()
        self.resume = resume
        self.count = 0
        self.totalsz = 0
        self.fsize = 0
        self.qsize = 0
        self.workq = deque()
        self.workq_cnt = 0

        if not resume:
            self.conn.execute("DROP TABLE IF EXISTS workq")
            self.conn.execute("DROP TABLE IF EXISTS backup")
            self.conn.execute("DROP TABLE IF EXISTS checkpoint")
            self.conn.execute(
                "CREATE TABLE workq (id INTEGER PRIMARY KEY, work BLOB)")
            self.conn.execute(
                "CREATE TABLE backup (id INTEGER PRIMARY KEY, work BLOB)")
            self.conn.execute("CREATE TABLE checkpoint(qsize, fsize)")
            self.conn.execute("INSERT INTO checkpoint values(?, ?)",
                              (self.qsize, self.fsize))
            self.conn.execute("INSERT INTO backup (work) values(?)",
                              (sqlite3.Binary(str(0)), ))
            self.conn.commit()
        else:
            self.recalibrate()

        self.logger.debug("Connected to %s" % self.dbname, extra=self.d)
Example #3
0
    def __init__(self, dbname,
                 resume=False):

        self.dbname = dbname
        self.conn = None
        self.logger = getLogger(__name__)

        # debug, dbstore doesn't have to be tied with rank.
        # so, it is set to be empty
        # self.d = {"rank" : "rank %s" % self.rank}
        self.d = {"rank": ''}

        try:
            self.conn = sqlite3.connect(dbname)
        except sqlite3.OperationalError as e:
            self.logger.error(e, extra=self.d)
            sys.exit(1)
        self.cur = self.conn.cursor()
        self.resume = resume
        self.count = 0
        self.totalsz = 0
        self.fsize = 0
        self.qsize = 0
        self.workq = deque()
        self.workq_cnt = 0

        if not resume:
            self.conn.execute("DROP TABLE IF EXISTS workq")
            self.conn.execute("DROP TABLE IF EXISTS backup")
            self.conn.execute("DROP TABLE IF EXISTS checkpoint")
            self.conn.execute("CREATE TABLE workq (id INTEGER PRIMARY KEY, work BLOB)")
            self.conn.execute("CREATE TABLE backup (id INTEGER PRIMARY KEY, work BLOB)")
            self.conn.execute("CREATE TABLE checkpoint(qsize, fsize)")
            self.conn.execute("INSERT INTO checkpoint values(?, ?)",
                              (self.qsize, self.fsize))
            self.conn.execute("INSERT INTO backup (work) values(?)",
                              (sqlite3.Binary(str(0)),))
            self.conn.commit()
        else:
            self.recalibrate()

        self.logger.debug("Connected to %s" % self.dbname, extra=self.d)
Example #4
0
    def __init__(self, dbname):

        self.dbname = dbname
        self.conn = None
        self.logger = getLogger(__name__)
        self.blocks = 30000
        self._size = 0

        # debug, dbstore doesn't have to be tied with rank.
        # so, it is set to be empty
        # self.d = {"rank" : "rank %s" % self.rank}
        self.d = {"rank": ''}

        try:
            self.conn = sqlite3.connect(dbname)
            self.conn.execute("DROP TABLE IF EXISTS chksums")
            self.conn.execute("CREATE TABLE chksums (path TEXT, sha1 TEXT)")
            self.conn.commit()

        except sqlite3.OperationalError as e:
            self.logger.error(e, extra=self.d)
            sys.exit(1)
        self.cur = self.conn.cursor()
Example #5
0
    def __init__(self, name="Circle", split="equal", k=2, dbname=None, resume=False):

        random.seed()  # use system time to seed
        self.comm = MPI.COMM_WORLD
        self.comm.Set_name(name)
        self.size = self.comm.Get_size()
        self.rank = self.comm.Get_rank()
        self.d = {"rank": "rank %s" % self.rank}
        self.logger = getLogger(__name__)


        self.useStore = G.use_store
        self.split = split
        self.dbname = dbname
        self.resume = resume
        self.reduce_time_interval = G.reduce_interval

        self.task = None
        self.abort = False
        self.requestors = []

        # workq buffer
        if self.useStore:
            self.workq_buf = deque()

        # counters
        self.work_requested = 0
        self.work_processed = 0
        self.work_request_received = 0
        self.workreq_outstanding = False
        self.workreq_rank = None

        # reduction
        self.reduce_enabled = True
        self.reduce_time_last = MPI.Wtime()
        self.reduce_outstanding = False
        self.reduce_replies = 0
        self.reduce_buf = {}
        self.reduce_status = None

        # barriers
        self.barrier_started = False
        self.barrier_up = False  # flag to indicate barrier sent to parent
        self.barrier_replies = 0

        self.workdir = os.getcwd()
        self.tempdir = os.path.join(self.workdir, ".pcircle")
        if not os.path.exists(self.tempdir):
            try:
                os.mkdir(self.tempdir)
            except OSError:
                pass

        # token
        self.token = Token(self)

        # tree init
        self.k = k
        self.parent_rank = MPI.PROC_NULL
        self.child_ranks = []  # [MPI.PROC_NULL] * k is too much C
        self.children = 0
        # compute rank of parent if we have one
        if self.rank > 0:
            self.parent_rank = (self.rank - 1) / k

        # identify ranks of what would be leftmost and rightmost children
        left = self.rank * k + 1
        right = self.rank * k + k

        # if we have at least one child
        # compute number of children and list of child ranks
        if left < self.size:
            # adjust right child in case we don't have a full set of k
            if right >= self.size:
                right = self.size - 1
            # compute number of children and the list
            self.children = right - left + 1

            for i in range(self.children):
                self.child_ranks.append(left + i)

        self.logger.debug("parent: %s, children: %s" % (self.parent_rank, self.child_ranks),
                          extra=self.d)

        # workq init
        # TODO: compare list vs. deque
        if G.use_store:
            self.workq_init(dbname, resume)
        else:
            self.workq = []

        self.logger.debug("Circle initialized", extra=self.d)
Example #6
0
    def __init__(self,
                 name="Circle",
                 split="equal",
                 k=2,
                 dbname=None,
                 resume=False):

        random.seed()  # use system time to seed
        self.comm = MPI.COMM_WORLD
        self.comm.Set_name(name)
        self.size = self.comm.Get_size()
        self.rank = self.comm.Get_rank()
        self.host = MPI.Get_processor_name()
        self.pid = os.getpid()

        self.d = {"rank": "rank %s" % self.rank}
        self.logger = getLogger(__name__)

        self.split = split
        self.dbname = dbname
        self.resume = resume
        self.reduce_time_interval = G.reduce_interval

        self.task = None
        self.abort = False
        self.requestors = []

        # counters
        self.work_requested = 0
        self.work_processed = 0
        self.work_request_received = 0
        self.workreq_outstanding = False
        self.workreq_rank = None

        # reduction
        self.reduce_enabled = False
        self.reduce_time_last = MPI.Wtime()
        self.reduce_outstanding = False
        self.reduce_replies = 0
        self.reduce_buf = {}
        self.reduce_status = None

        # periodic report
        self.report_enabled = False
        self.report_interval = 60
        self.report_last = MPI.Wtime()
        self.report_processed = 0

        # barriers
        self.barrier_started = False
        self.barrier_up = False  # flag to indicate barrier sent to parent
        self.barrier_replies = 0

        self.workdir = os.getcwd()
        if not G.tempdir:
            G.tempdir = os.path.join(os.getcwd(),
                                     (".pcircle" + utils.timestamp()))
            G.tempdir = self.comm.bcast(G.tempdir)

        if not os.path.exists(G.tempdir):
            try:
                os.mkdir(G.tempdir)
            except OSError:
                pass

        # token
        self.token = Token(self)

        # tree init
        self.k = k
        self.parent_rank = MPI.PROC_NULL
        self.child_ranks = []  # [MPI.PROC_NULL] * k is too much C
        self.children = 0
        # compute rank of parent if we have one
        if self.rank > 0:
            self.parent_rank = (self.rank - 1) // k

        # identify ranks of what would be leftmost and rightmost children
        left = self.rank * k + 1
        right = self.rank * k + k

        # if we have at least one child
        # compute number of children and list of child ranks
        if left < self.size:
            # adjust right child in case we don't have a full set of k
            if right >= self.size:
                right = self.size - 1
            # compute number of children and the list
            self.children = right - left + 1

            for i in range(self.children):
                self.child_ranks.append(left + i)

        self.logger.debug("parent: %s, children: %s" %
                          (self.parent_rank, self.child_ranks),
                          extra=self.d)

        # workq init
        # TODO: compare list vs. deque
        # 3 possible workq: workq, workq_buf(locates in memory, used when pushing to or retrieving from database )
        self.workq = deque()
        # workq buffer
        self.workq_buf = deque()
        # flag that indicates database is used for workq
        self.use_store = False

        if G.resume:
            self.workq_init(self.dbname, G.resume)

        self.logger.debug("Circle initialized", extra=self.d)
Example #7
0
__author__ = 'f7b'

from mpi4py import MPI
from pcircle.globals import G, T
from pcircle.utils import getLogger

# module variables
log = getLogger(__name__)


def colorstr(c):
    if c == G.BLACK:
        return "black"
    elif c == G.WHITE:
        return "white"
    elif c == G.TERMINATE:
        return "terminate"
    else:
        return "unknown"


class Token:

    def __init__(self, circle):
        """ The Token protocol
        src: which rank send token to me
        dest: which rank to send token to next
        color: current token color
        proc: current color of process (black, white, terminate)
        send_req: request associating with pending receive
        """
Example #8
0
__author__ = 'f7b'

from mpi4py import MPI
from pcircle.globals import G, T
from pcircle.utils import getLogger

# module variables
log = getLogger(__name__)


def colorstr(c):
    if c == G.BLACK:
        return "black"
    elif c == G.WHITE:
        return "white"
    elif c == G.TERMINATE:
        return "terminate"
    else:
        return "unknown"


class Token:
    def __init__(self, circle):
        """ The Token protocol
        src: which rank send token to me
        dest: which rank to send token to next
        color: current token color
        proc: current color of process (black, white, terminate)
        send_req: request associating with pending receive
        """
        self.circle = circle