def __init__(self, circle, src, perfile=True): self.logger = utils.getLogger(__name__) self.d = {"rank": "rank %s" % circle.rank} self.circle = circle self.src = src self.interval = 10 # progress report # hard links self.nlinks = 0 self.nlinked_files = 0 self.pipes = 0 self.sockets = 0 self.sym_links = 0 self.follow_sym_links = False if perfile: tmpfile = os.path.join( os.getcwd(), "fprof-perfile.%s" % circle.rank) self.outfile = open(tmpfile, "w") self.fszlst = [] # store perfile size else: self.outfile = None self.cnt_dirs = 0 self.cnt_files = 0 self.cnt_filesize = 0 self.cnt_stat_filesize = 0 # uncompressed self.cnt_0byte = 0 self.last_cnt = 0 self.skipped = 0 self.maxfiles = 0 self.maxfiles_dir = None self.devfile_cnt = 0 # track # of dev files self.devfile_sz = 0 # track size of dev files self.last_reduce_time = MPI.Wtime() self.sparse_cnt = 0 self.cnt_blocks = 0 # reduce self.reduce_items = 0 self.time_started = MPI.Wtime() self.time_ended = None
def __init__(self, dbname, resume=False): self.dbname = dbname self.conn = None self.logger = getLogger(__name__) # debug, dbstore doesn't have to be tied with rank. # so, it is set to be empty # self.d = {"rank" : "rank %s" % self.rank} self.d = {"rank": ''} try: self.conn = sqlite3.connect(dbname) except sqlite3.OperationalError as e: self.logger.error(e, extra=self.d) sys.exit(1) self.cur = self.conn.cursor() self.resume = resume self.count = 0 self.totalsz = 0 self.fsize = 0 self.qsize = 0 self.workq = deque() self.workq_cnt = 0 if not resume: self.conn.execute("DROP TABLE IF EXISTS workq") self.conn.execute("DROP TABLE IF EXISTS backup") self.conn.execute("DROP TABLE IF EXISTS checkpoint") self.conn.execute( "CREATE TABLE workq (id INTEGER PRIMARY KEY, work BLOB)") self.conn.execute( "CREATE TABLE backup (id INTEGER PRIMARY KEY, work BLOB)") self.conn.execute("CREATE TABLE checkpoint(qsize, fsize)") self.conn.execute("INSERT INTO checkpoint values(?, ?)", (self.qsize, self.fsize)) self.conn.execute("INSERT INTO backup (work) values(?)", (sqlite3.Binary(str(0)), )) self.conn.commit() else: self.recalibrate() self.logger.debug("Connected to %s" % self.dbname, extra=self.d)
def __init__(self, dbname, resume=False): self.dbname = dbname self.conn = None self.logger = getLogger(__name__) # debug, dbstore doesn't have to be tied with rank. # so, it is set to be empty # self.d = {"rank" : "rank %s" % self.rank} self.d = {"rank": ''} try: self.conn = sqlite3.connect(dbname) except sqlite3.OperationalError as e: self.logger.error(e, extra=self.d) sys.exit(1) self.cur = self.conn.cursor() self.resume = resume self.count = 0 self.totalsz = 0 self.fsize = 0 self.qsize = 0 self.workq = deque() self.workq_cnt = 0 if not resume: self.conn.execute("DROP TABLE IF EXISTS workq") self.conn.execute("DROP TABLE IF EXISTS backup") self.conn.execute("DROP TABLE IF EXISTS checkpoint") self.conn.execute("CREATE TABLE workq (id INTEGER PRIMARY KEY, work BLOB)") self.conn.execute("CREATE TABLE backup (id INTEGER PRIMARY KEY, work BLOB)") self.conn.execute("CREATE TABLE checkpoint(qsize, fsize)") self.conn.execute("INSERT INTO checkpoint values(?, ?)", (self.qsize, self.fsize)) self.conn.execute("INSERT INTO backup (work) values(?)", (sqlite3.Binary(str(0)),)) self.conn.commit() else: self.recalibrate() self.logger.debug("Connected to %s" % self.dbname, extra=self.d)
def __init__(self, dbname): self.dbname = dbname self.conn = None self.logger = getLogger(__name__) self.blocks = 30000 self._size = 0 # debug, dbstore doesn't have to be tied with rank. # so, it is set to be empty # self.d = {"rank" : "rank %s" % self.rank} self.d = {"rank": ''} try: self.conn = sqlite3.connect(dbname) self.conn.execute("DROP TABLE IF EXISTS chksums") self.conn.execute("CREATE TABLE chksums (path TEXT, sha1 TEXT)") self.conn.commit() except sqlite3.OperationalError as e: self.logger.error(e, extra=self.d) sys.exit(1) self.cur = self.conn.cursor()
def __init__(self, name="Circle", split="equal", k=2, dbname=None, resume=False): random.seed() # use system time to seed self.comm = MPI.COMM_WORLD self.comm.Set_name(name) self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() self.d = {"rank": "rank %s" % self.rank} self.logger = getLogger(__name__) self.useStore = G.use_store self.split = split self.dbname = dbname self.resume = resume self.reduce_time_interval = G.reduce_interval self.task = None self.abort = False self.requestors = [] # workq buffer if self.useStore: self.workq_buf = deque() # counters self.work_requested = 0 self.work_processed = 0 self.work_request_received = 0 self.workreq_outstanding = False self.workreq_rank = None # reduction self.reduce_enabled = True self.reduce_time_last = MPI.Wtime() self.reduce_outstanding = False self.reduce_replies = 0 self.reduce_buf = {} self.reduce_status = None # barriers self.barrier_started = False self.barrier_up = False # flag to indicate barrier sent to parent self.barrier_replies = 0 self.workdir = os.getcwd() self.tempdir = os.path.join(self.workdir, ".pcircle") if not os.path.exists(self.tempdir): try: os.mkdir(self.tempdir) except OSError: pass # token self.token = Token(self) # tree init self.k = k self.parent_rank = MPI.PROC_NULL self.child_ranks = [] # [MPI.PROC_NULL] * k is too much C self.children = 0 # compute rank of parent if we have one if self.rank > 0: self.parent_rank = (self.rank - 1) / k # identify ranks of what would be leftmost and rightmost children left = self.rank * k + 1 right = self.rank * k + k # if we have at least one child # compute number of children and list of child ranks if left < self.size: # adjust right child in case we don't have a full set of k if right >= self.size: right = self.size - 1 # compute number of children and the list self.children = right - left + 1 for i in range(self.children): self.child_ranks.append(left + i) self.logger.debug("parent: %s, children: %s" % (self.parent_rank, self.child_ranks), extra=self.d) # workq init # TODO: compare list vs. deque if G.use_store: self.workq_init(dbname, resume) else: self.workq = [] self.logger.debug("Circle initialized", extra=self.d)
def __init__(self, name="Circle", split="equal", k=2, dbname=None, resume=False): random.seed() # use system time to seed self.comm = MPI.COMM_WORLD self.comm.Set_name(name) self.size = self.comm.Get_size() self.rank = self.comm.Get_rank() self.host = MPI.Get_processor_name() self.pid = os.getpid() self.d = {"rank": "rank %s" % self.rank} self.logger = getLogger(__name__) self.split = split self.dbname = dbname self.resume = resume self.reduce_time_interval = G.reduce_interval self.task = None self.abort = False self.requestors = [] # counters self.work_requested = 0 self.work_processed = 0 self.work_request_received = 0 self.workreq_outstanding = False self.workreq_rank = None # reduction self.reduce_enabled = False self.reduce_time_last = MPI.Wtime() self.reduce_outstanding = False self.reduce_replies = 0 self.reduce_buf = {} self.reduce_status = None # periodic report self.report_enabled = False self.report_interval = 60 self.report_last = MPI.Wtime() self.report_processed = 0 # barriers self.barrier_started = False self.barrier_up = False # flag to indicate barrier sent to parent self.barrier_replies = 0 self.workdir = os.getcwd() if not G.tempdir: G.tempdir = os.path.join(os.getcwd(), (".pcircle" + utils.timestamp())) G.tempdir = self.comm.bcast(G.tempdir) if not os.path.exists(G.tempdir): try: os.mkdir(G.tempdir) except OSError: pass # token self.token = Token(self) # tree init self.k = k self.parent_rank = MPI.PROC_NULL self.child_ranks = [] # [MPI.PROC_NULL] * k is too much C self.children = 0 # compute rank of parent if we have one if self.rank > 0: self.parent_rank = (self.rank - 1) // k # identify ranks of what would be leftmost and rightmost children left = self.rank * k + 1 right = self.rank * k + k # if we have at least one child # compute number of children and list of child ranks if left < self.size: # adjust right child in case we don't have a full set of k if right >= self.size: right = self.size - 1 # compute number of children and the list self.children = right - left + 1 for i in range(self.children): self.child_ranks.append(left + i) self.logger.debug("parent: %s, children: %s" % (self.parent_rank, self.child_ranks), extra=self.d) # workq init # TODO: compare list vs. deque # 3 possible workq: workq, workq_buf(locates in memory, used when pushing to or retrieving from database ) self.workq = deque() # workq buffer self.workq_buf = deque() # flag that indicates database is used for workq self.use_store = False if G.resume: self.workq_init(self.dbname, G.resume) self.logger.debug("Circle initialized", extra=self.d)
__author__ = 'f7b' from mpi4py import MPI from pcircle.globals import G, T from pcircle.utils import getLogger # module variables log = getLogger(__name__) def colorstr(c): if c == G.BLACK: return "black" elif c == G.WHITE: return "white" elif c == G.TERMINATE: return "terminate" else: return "unknown" class Token: def __init__(self, circle): """ The Token protocol src: which rank send token to me dest: which rank to send token to next color: current token color proc: current color of process (black, white, terminate) send_req: request associating with pending receive """
__author__ = 'f7b' from mpi4py import MPI from pcircle.globals import G, T from pcircle.utils import getLogger # module variables log = getLogger(__name__) def colorstr(c): if c == G.BLACK: return "black" elif c == G.WHITE: return "white" elif c == G.TERMINATE: return "terminate" else: return "unknown" class Token: def __init__(self, circle): """ The Token protocol src: which rank send token to me dest: which rank to send token to next color: current token color proc: current color of process (black, white, terminate) send_req: request associating with pending receive """ self.circle = circle