def start_workers(self): global first_try timer.start('work.start.workers') started_worker_cnt = 0 slots = self.wq.hungry() if slots == 100: slots = 20 #if slots>0 and slots<25: # slots = 25 batch = db.task_claim(slots) if batch: sys.stdout.write('.') sys.stdout.flush() calls = db.task_get(batch) for call in calls: self.execute(call) started_worker_cnt += 1 elif len(self.workers) == 0 and db.task_cnt() == 0: time.sleep(5) sys.stdout.write(',') sys.stdout.flush() timer.report() return -1 #sys.exit(0) self.total_tasks += started_worker_cnt timer.stop('work.start.workers') return started_worker_cnt
def finish_workers(self): timer.start('work.finish.workers') finished_worker_cnt = 0 t = self.wq.wait(10) if t: worker = self.workers[t.id] print "WQ execution (%s) completed in %s: %s (return code %d)" % ( worker.call.cbid, worker.sandbox, worker.call.body['cmd'], t.return_status) if t.return_status != 0: self.fails.append(worker.call) if worker.debug(t): self.wq.blacklist(t.host) node = self.db.fetch(worker.call.key) self.execute(node.obj) else: worker.finish() self.worker_cnt -= 1 del self.workers[t.id] db.task_del(worker.call.cbid) timer.stop('work.finish.workers') return finished_worker_cnt
def update_b3(self, c, hypothetical=False): timer.start("update b3") if len(c) == 1: self.p_den -= 1 self.p_num -= self.ps[c[0]] self.r_num -= self.rs[c[0]] self.ps[c[0]] = 0 self.rs[c[0]] = 0 else: intersect_counts = Counter() for m in c: if m in self.mention_to_gold: intersect_counts[self.mention_to_gold[m]] += 1 for m in c: if m in self.mention_to_gold: self.p_num -= self.ps[m] self.r_num -= self.rs[m] g = self.mention_to_gold[m] ic = intersect_counts[g] self.p_num += ic / float(len(c)) self.r_num += ic / float(len(g)) if not hypothetical: self.ps[m] = ic / float(len(c)) self.rs[m] = ic / float(len(g)) timer.stop("update b3")
def link(self, m1, m2, hypothetical=False, beta=1): timer.start("link") if m1 == -1: return self.get_f1(beta=beta) if hypothetical else None c1, c2 = self.mention_to_cluster[m1], self.mention_to_cluster[m2] assert c1 != c2 new_c = c1 + c2 p_num, r_num, p_den, r_den = self.p_num, self.r_num, self.p_den, self.r_den if len(c1) == 1: self.p_den += 1 if len(c2) == 1: self.p_den += 1 self.update_b3(new_c, hypothetical=hypothetical) if hypothetical: f1 = evaluation.f1(self.p_num, self.p_den, self.r_num, self.r_den, beta=beta) self.p_num, self.r_num, self.p_den, self.r_den = p_num, r_num, p_den, r_den timer.stop("link") return f1 else: self.ana_to_ant[m2] = m1 self.ant_to_anas[m1].append(m2) self.clusters.remove(c1) self.clusters.remove(c2) self.clusters.append(new_c) for m in new_c: self.mention_to_cluster[m] = new_c timer.stop("link")
def unlink(self, m): timer.start("unlink") old_ant = self.ana_to_ant[m] if old_ant != -1: self.ana_to_ant[m] = -1 self.ant_to_anas[old_ant].remove(m) old_c = self.mention_to_cluster[m] c1 = [m] frontier = self.ant_to_anas[m][:] while len(frontier) > 0: m = frontier.pop() c1.append(m) frontier += self.ant_to_anas[m] c1 = tuple(c1) c2 = tuple(m for m in old_c if m not in c1) self.update_b3(c1) self.update_b3(c2) self.clusters.remove(old_c) self.clusters.append(c1) self.clusters.append(c2) for m in c1: self.mention_to_cluster[m] = c1 for m in c2: self.mention_to_cluster[m] = c2 timer.stop("unlink")
def run_agent(self, s, beta=0, iteration=1): timer.start("running agent") merged_pairs = [] while not s.is_complete(): example = s.get_example(self.training) n_candidates = example['starts'].size + 1 if self.training: self.replay_memory.update(example) if random.random() > beta: if iteration == -1: i = n_candidates - 1 else: timer.start("predict") scores = self.model.predict_on_batch(example)[0] if self.training: self.loss_aggregator.update( np.sum(scores * example['costs'])) i = np.argmax(scores[:, 0]) timer.stop("predict") else: i = np.argmin(example['costs'][:, 0]) if i != n_candidates - 1: merged_pairs.append( (s.candidate_antecedents[i], s.current_mention)) s.do_action(i) timer.stop("running agent") return merged_pairs
def task_prep( self, item ): calls = [] timer.start('db.task.prep') while True: try: conn, log = (self.tconn, self.tlog) with conn: # Check if task is already queued curs = conn.cursor() curs.execute('SELECT cbid FROM todos WHERE next_arg IN (?,?)', (item.cbid, item.dbid,) ) res = curs.fetchall() for r in res: call = self.find_one( r['cbid'] ) if call: # Update next_arg for task self.task_update( call ) except sqlite3.OperationalError: print 'Database (todos) is locked on task_prep' time.sleep(1) continue break timer.stop('db.task.prep') return calls
def hashstring(str): timer.start('utils.hashstring') key = hashlib.sha1() key.update(str) key = key.hexdigest() timer.stop('utils.hashstring') return key
def start_workers( self ): global first_try timer.start('work.start.workers') started_worker_cnt = 0 slots = glob.exec_local_concurrency - len(self.workers) batch = db.task_claim( slots ) sys.stdout.write('.') sys.stdout.flush() if batch: calls = db.task_get( batch ) for call in calls: self.execute( call ) started_worker_cnt += 1 elif len(self.workers)==0 and db.task_remain( glob.workflow_id )==0 and self.total_tasks>0: timer.report() return -2 #sys.exit(0) if first_try: if started_worker_cnt==0: print 'Nothing to execute.' timer.report() return -1 #sys.exit(0) first_try = False self.total_tasks += started_worker_cnt timer.stop('work.start.workers') return started_worker_cnt
def finish_workers( self ): timer.start('work.finish.workers') finished_worker_cnt = 0 for k, worker in enumerate( self.workers ): if worker.process.poll() is not None: (stdout, stderr) = worker.process.communicate() print "Local execution complete (%s): %s (return code %d)" % (worker.call.cbid, worker.call.body['cmd'], worker.process.returncode) finished_worker_cnt += 1 if worker.process.returncode != 0: if len(stdout)>0: d( 'exec', 'stdout:\n', stdout ) if len(stderr)>0: d( 'exec', 'stderr:\n', stderr ) else: if len(stdout)>0: print 'stdout:\n', stdout if len(stderr)>0: print 'stderr:\n', stderr worker.finish() del self.workers[k] db.task_del( worker.call.cbid ) timer.stop('work.finish.workers') return finished_worker_cnt
def finish_workers(self): timer.start('work.finish.workers') finished_worker_cnt = 0 for k, worker in enumerate(self.workers): if worker.process.poll() is not None: (stdout, stderr) = worker.process.communicate() print "\nLocal execution complete (%s): %s (return code %d)" % ( worker.call.cbid, worker.call.body['cmd'], worker.process.returncode) finished_worker_cnt += 1 if worker.process.returncode != 0: if len(stdout) > 0: d('exec', 'stdout:\n', stdout) if len(stderr) > 0: d('exec', 'stderr:\n', stderr) else: if len(stdout) > 0: print 'stdout:\n', stdout if len(stderr) > 0: print 'stderr:\n', stderr worker.finish() del self.workers[k] db.task_del(worker.call.cbid) timer.stop('work.finish.workers') return finished_worker_cnt
def finish_workers( self ): timer.start('work.finish.workers') finished_worker_cnt = 0 t = self.wq.wait(10) if t: worker = self.workers[t.id] print "WQ execution (%s) completed in %s: %s (return code %d)" % (worker.call.cbid, worker.sandbox, worker.call.body['cmd'], t.return_status) if t.return_status != 0: self.fails.append( worker.call ) if worker.debug( t ): self.wq.blacklist( t.host ) node = self.db.fetch( worker.call.key ) self.execute( node.obj ) else: worker.finish() self.worker_cnt -= 1 del self.workers[t.id] db.task_del( worker.call.cbid ) timer.stop('work.finish.workers') return finished_worker_cnt
def start_workers(self): global first_try timer.start('work.start.workers') started_worker_cnt = 0 slots = glob.exec_local_concurrency - len(self.workers) batch = db.task_claim(slots) sys.stdout.write('.') sys.stdout.flush() if batch: calls = db.task_get(batch) for call in calls: self.execute(call) started_worker_cnt += 1 elif len(self.workers) == 0 and db.task_remain( glob.workflow_id) == 0 and self.total_tasks > 0: timer.report() return -2 #sys.exit(0) if first_try: if started_worker_cnt == 0: print 'Nothing to execute.' timer.report() return -1 #sys.exit(0) first_try = False self.total_tasks += started_worker_cnt timer.stop('work.start.workers') return started_worker_cnt
def hashstring( str ): timer.start('utils.hashstring') key = hashlib.sha1() key.update( str ) key = key.hexdigest() timer.stop('utils.hashstring') return key
def start_workers( self ): global first_try timer.start('work.start.workers') started_worker_cnt = 0 slots = self.wq.hungry() if slots==100: slots = 20 #if slots>0 and slots<25: # slots = 25 batch = db.task_claim( slots ) if batch: sys.stdout.write('.') sys.stdout.flush() calls = db.task_get( batch ) for call in calls: self.execute( call ) started_worker_cnt += 1 elif len(self.workers)==0 and db.task_cnt()==0: time.sleep(5) sys.stdout.write(',') sys.stdout.flush() timer.report() return -1 #sys.exit(0) self.total_tasks += started_worker_cnt timer.stop('work.start.workers') return started_worker_cnt
def task_get( self, batch ): calls = [] timer.start('db.task.get') while True: try: conn, log = (self.tconn, self.tlog) with conn: curs = conn.cursor() curs.execute('SELECT cbid FROM todos WHERE assigned=?', (batch,) ) res = curs.fetchall() for r in res: call = self.find_one( r['cbid'] ) if call: calls.append( call ) except sqlite3.OperationalError: print 'Database (todos) is locked on task_get' time.sleep(1) continue break timer.stop('db.task.get') return calls
def run_agent(self, s, beta=0, iteration=1): timer.start("running agent") merged_pairs = [] while not s.is_complete(): example = s.get_example(self.training) n_candidates = example['starts'].size + 1 if self.training: self.replay_memory.update(example) if random.random() > beta: if iteration == -1: i = n_candidates - 1 else: timer.start("predict") scores = self.model.predict_on_batch(example)[0] if self.training: self.loss_aggregator.update(np.sum(scores * example['costs'])) i = np.argmax(scores[:, 0]) timer.stop("predict") else: i = np.argmin(example['costs'][:, 0]) if i != n_candidates - 1: merged_pairs.append((s.candidate_antecedents[i], s.current_mention)) s.do_action(i) timer.stop("running agent") return merged_pairs
def connect( self ): global dlog, clog timer.start('db.connect') self.dconn, dlog = self.sqlite3_connect( glob.data_db_pathname, glob.data_log_pathname, 'data_db_logger', glob.data_file_directory ) self.cconn, clog = self.sqlite3_connect( glob.cache_db_pathname, glob.cache_log_pathname, 'cache_db_logger', glob.cache_file_directory ) self.trconn, clog = self.sqlite3_connect( glob.trash_db_pathname, glob.trash_log_pathname, 'trash_db_logger', glob.trash_file_directory ) self.tconn, self.tlog = self.sqlite3_connect( glob.work_db_pathname, glob.work_log_pathname ) db_init_str = """ CREATE TABLE IF NOT EXISTS items ( id INTEGER NOT NULL, type TEXT, cbid TEXT, dbid TEXT, wfid UUID, step TEXT, "when" FLOAT, meta TEXT, body TEXT, repo UUID, path TEXT, size INTEGER, PRIMARY KEY (id) ); """ db_init_todos = """ CREATE TABLE IF NOT EXISTS todos ( id INTEGER NOT NULL, cbid TEXT, wfid UUID, step TEXT, priority INTEGER DEFAULT 0, next_arg TEXT, assigned TEXT, failures INTEGER DEFAULT 0, PRIMARY KEY (id) ); """ curs = self.dconn.cursor() curs.execute( db_init_str ) curs.execute( 'CREATE INDEX IF NOT EXISTS itcbids ON items(cbid);' ) curs.execute( 'CREATE INDEX IF NOT EXISTS itdbids ON items(dbid);' ) self.dconn.commit() curs = self.cconn.cursor() curs.execute( db_init_str ) curs.execute( 'CREATE INDEX IF NOT EXISTS itcbids ON items(cbid);' ) curs.execute( 'CREATE INDEX IF NOT EXISTS itdbids ON items(dbid);' ) self.cconn.commit() curs = self.trconn.cursor() curs.execute( db_init_str ) curs.execute( 'CREATE INDEX IF NOT EXISTS itcbids ON items(cbid);' ) curs.execute( 'CREATE INDEX IF NOT EXISTS itdbids ON items(dbid);' ) self.trconn.commit() curs = self.tconn.cursor() curs.execute( db_init_todos ) curs.execute( 'CREATE INDEX IF NOT EXISTS tdnext ON todos(next_arg, assigned);' ) self.tconn.commit() self.tconn.isolation_level = 'EXCLUSIVE' timer.stop('db.connect')
def evaluate_model(dataset, docs, model, model_props, stats, save_output=False, save_scores=False, print_table=False): prog = utils.Progbar(dataset.n_batches) mt = RankingMetricsTracker(dataset.name, model_props=model_props) \ if model_props.ranking else ClassificationMetricsTracker(dataset.name) mta = ClassificationMetricsTracker(dataset.name + " anaphoricity", anaphoricity=True) docs_by_id = {doc.did: doc for doc in docs} if model_props.ranking else {} saved_links, saved_scores = (defaultdict(list) if save_output else None, defaultdict(dict) if save_scores else None) for i, X in enumerate(dataset): if X['y'].size == 0: continue progress = [] scores = model.predict_on_batch(X) if model_props.ranking: update_doc(docs_by_id[X['did']], X, scores, saved_links=saved_links, saved_scores=saved_scores) if model_props.anaphoricity and not model_props.ranking: progress.append(("anaphoricity loss", mta.update(X, scores[0][:, 0]))) if not model_props.anaphoricity_only: progress.append(("loss", mt.update( X, scores if model_props.ranking else scores[1 if model_props.anaphoricity else 0][:, 0]))) prog.update(i + 1, exact=progress) if save_scores: print "Writing scores" utils.write_pickle(saved_scores, model_props.path + dataset.name + '_scores.pkl') if save_output: print "Writing output" utils.write_pickle(saved_links, model_props.path + dataset.name + '_links.pkl') utils.write_pickle(docs, model_props.path + dataset.name + '_processed_docs.pkl') timer.start("metrics") if model_props.ranking: stats.update(compute_metrics(docs, dataset.name)) stats["validate time"] = time.time() - prog.start if model_props.anaphoricity and not model_props.ranking: mta.finish(stats) if not model_props.anaphoricity_only: mt.finish(stats) timer.stop("metrics") if print_table: print " & ".join(map(lambda x: "{:.2f}".format(x * 100), [ stats[dataset.name + " muc precision"], stats[dataset.name + " muc recall"], stats[dataset.name + " muc"], stats[dataset.name + " b3 precision"], stats[dataset.name + " b3 recall"], stats[dataset.name + " b3"], stats[dataset.name + " ceafe precision"], stats[dataset.name + " ceafe recall"], stats[dataset.name + " ceafe"], stats[dataset.name + " conll"], ]))
def space(event): if event.char != " ": return if timer.run == False: timer.run = True time.sleep(0) else: timer.stop() s.Gen()
def train(self): timer.start("train") X = self.memory.pop(int(random.random() * len(self.memory))) self.train_on_example(X) self.size -= 1 timer.stop("train") if self.trainer.n == 1: print "Start training!" print
def load(self, data, pair_model, anaphoricity_model): timer.start("pair model") pair_features, self.pair_ids = data.vectorize_pairs(self.did, self.possible_pairs) self.pair_vectors = run_static_model(pair_features, pair_model) timer.stop("pair model") timer.start("anaphoricity model") mention_features, self.mention_ids = data.vectorize_mentions(self.did, self.mentions) self.mention_vectors = run_static_model(mention_features, anaphoricity_model) timer.stop("anaphoricity model")
def dump( self, key, pathname ): timer.start('db.dump') item = self.find_one( key ) if not item: print 'Not ready yet: %s' % key else: with open( pathname, 'w' ) as f: item.stream_content( f ) timer.stop('db.dump')
def insert( self, item ): timer.start('db.insert') action_taken = 'exists' if item.type == 'temp': # This is an intermediate file, use the cache if not self.exists_temp_dbid( item.dbid ): conn, file_dir, log = (self.cconn, glob.cache_file_directory, clog) action_taken = 'saved' else: # This is not intermediate data, don't use cache if not self.exists_data( item ): conn, file_dir, log = (self.dconn, glob.data_file_directory, dlog) action_taken = 'saved' if action_taken == 'saved': if item.path: new_pathname = file_dir + item.cbid if not os.path.isfile( new_pathname ): shutil.move( item.path, new_pathname ) item.path = item.cbid ins, vals = item.sqlite3_insert() #print ins #print vals while True: try: curs = conn.cursor() curs.execute( ins, vals ) conn.commit() except sqlite3.OperationalError: print 'Database (todos) is locked on insert_exec' time.sleep(1) continue break self.task_prep( item ) if item.type == 'temp': log.info('%s (%s) %s' % (item.cbid, item.dbid, action_taken)) else: log.info('%s %s' % (item.cbid, action_taken)) timer.stop('db.insert') #if glob.total_quota: # self.keep_quota() if action_taken == 'saved': return True else: return False
def action_costs(self): timer.start("costs") costs = [] for ant in self.candidate_antecedents: hypothetical_score = self.doc.link(ant, self.current_mention, hypothetical=True) costs.append(hypothetical_score) costs.append(self.get_f1()) timer.stop("costs") costs = np.array(costs, dtype='float') costs -= costs.max() costs *= (len(self.doc.mention_to_gold) + len(self.doc.mentions)) / 100.0 return -costs[:, np.newaxis]
def stop(ID): """ Arrête une animation si elle est en pause ou en cours. :param string ID: ID de l'objet dans ui.objects """ if animations[ID]["status"] != 0: timer.stop(ID + "timer") animations[ID]["step"] = 0 animations[ID]["status"] = 0 else: print("Animation warning: cannot pause animation", ID + ": animation already stopped")
def hashfile(fname, blocksize=65536): timer.start('utils.hashfile') key = hashlib.sha1() afile = open(fname, 'rb') buf = afile.read(blocksize) length = len(buf) while len(buf) > 0: key.update(buf) buf = afile.read(blocksize) length += len(buf) key = key.hexdigest() timer.stop('utils.hashfile') return key, length
def hashfile( fname, blocksize=65536 ): timer.start('utils.hashfile') key = hashlib.sha1() afile = open( fname, 'rb' ) buf = afile.read( blocksize ) length = len( buf ) while len( buf ) > 0: key.update( buf ) buf = afile.read( blocksize ) length += len( buf ) key = key.hexdigest() timer.stop('utils.hashfile') return key, length
def find_one( self, key ): timer.start('db.find_one') curs = self.dconn.cursor() curs.execute("SELECT * FROM items WHERE cbid=? ORDER BY id DESC LIMIT 1", (key,) ) res = curs.fetchone() if not res: curs = self.cconn.cursor() curs.execute('SELECT * FROM items WHERE cbid=? OR dbid=? ORDER BY id DESC LIMIT 1', (key, key,) ) res = curs.fetchone() if res: res = Item( res ) timer.stop('db.find_one') return res
def keep_quota( self, quota_bytes ): timer.start('db.keep_quota') consumption = self.quota_status() print '%i %i %i'%(time.time(), consumption, quota_bytes) if consumption <= quota_bytes: pass #print 'Under Quota by:', (quota_bytes-consumption) else: over_consumption = consumption-quota_bytes #print 'Over Quota by:', over_consumption file_cnt = 0 while over_consumption>0: try: curs = self.cconn.cursor() trcurs = self.trconn.cursor() curs.execute('SELECT * FROM items ORDER BY id LIMIT 25;') res = curs.fetchall() timer.stop('db.keep_quota') cbids = [] for r in res: it = Item(r) if it.dbid != '685aa1bae538a9f5dba28a55858467f82f5142a8:0': #shutil.copy( glob.cache_file_directory+it.path, glob.trash_file_directory+it.path ) ins, dat = it.sqlite3_insert() trcurs.execute( ins, dat ) cbids.append( it.cbid ) over_consumption -= it.size if over_consumption <= 0: break self.trconn.commit() print '# %i files put in the trash'%(len(cbids)) #timer.start('db.keep_quota') for cbid in cbids: pass #print ' -'+cbid curs.execute( "DELETE FROM items WHERE cbid=?;", (cbid,) ) self.cconn.commit() except sqlite3.OperationalError: print 'Database (cache) is locked on keep_quota' time.sleep(1) continue file_cnt += len(cbids) return file_cnt
def task_cnt( self ): calls = [] timer.start('db.task.count') conn, log = (self.tconn, self.tlog) with conn: try: curs = conn.cursor() curs.execute('SELECT count(id) as cnt FROM todos WHERE 1') res = curs.fetchone() timer.stop('db.task.count') return int(res['cnt']) except sqlite3.OperationalError: return 1
def task_remain( self, wfid ): calls = [] timer.start('db.task.remain') conn, log = (self.tconn, self.tlog) with conn: try: curs = conn.cursor() curs.execute('SELECT id FROM todos WHERE wfid = ?', (glob.workflow_id,) ) res = curs.fetchall() timer.stop('db.task.remain') return len(res) except sqlite3.OperationalError: return 1
def exists_data( self, item ): timer.start('db.exists_data') while True: try: curs = self.dconn.cursor() curs.execute('SELECT "when" FROM items WHERE cbid=?', (item.cbid,) ) when = curs.fetchone() timer.stop('db.exists_data') return when except sqlite3.OperationalError: print 'Database (todos) is locked on exists_data' time.sleep(1) continue break
def train_all(self): timer.start("train") model_weights = self.model.get_weights() prog = util.Progbar(len(self.memory)) random.shuffle(self.memory) for i, X in enumerate(self.memory): loss = self.train_on_example(X) prog.update(i + 1, [("loss", loss)]) self.size = 0 self.memory = [] timer.stop("train") weight_diffs = [(np.sum(np.abs(new_weight - old_weight)), new_weight.size) for new_weight, old_weight in zip( self.model.get_weights(), model_weights)] summed = np.sum(map(np.array, weight_diffs), axis=0) print "weight diffs", weight_diffs, summed
def find( self, key ): timer.start('db.find') curs = self.dconn.cursor() curs.execute('SELECT * FROM items WHERE cbid=?', (key,) ) res = curs.fetchall() if len(res)<=0: curs = self.cconn.cursor() curs.execute('SELECT * FROM items WHERE cbid=? OR dbid=?', (key, key,) ) res = curs.fetchall() returns = [] for r in res: returns.append( Item(r) ) timer.stop('db.find') return returns
def train_all(self): timer.start("train") model_weights = self.model.get_weights() prog = utils.Progbar(len(self.memory)) random.shuffle(self.memory) for i, X in enumerate(self.memory): loss = self.train_on_example(X) prog.update(i + 1, [("loss", loss)]) self.size = 0 self.memory = [] timer.stop("train") weight_diffs = [ (np.sum(np.abs(new_weight - old_weight)), new_weight.size) for new_weight, old_weight in zip(self.model.get_weights(), model_weights)] summed = np.sum(map(np.array, weight_diffs), axis=0) print "weight diffs", weight_diffs, summed
def task_claim(self, count=1): batch = uuid() timer.start('db.task.claim') while True: try: conn, log = (self.tconn, self.tlog) with conn: curs = conn.cursor() if glob.wq_stage: curs.execute( 'SELECT cbid FROM todos WHERE next_arg IS NULL AND assigned IS NULL AND step = ? LIMIT ?', (glob.wq_stage, count)) else: curs.execute( 'SELECT cbid FROM todos WHERE next_arg IS NULL AND assigned IS NULL ORDER BY id LIMIT ?', (count, )) res = curs.fetchall() cbids = [] for r in res: cbids.append(r['cbid']) if len(cbids) > 0: upd_str = 'UPDATE todos SET assigned = ? WHERE next_arg IS NULL AND assigned IS NULL AND cbid IN (%s);' % ', '.join( '?' for c in cbids) #print upd_str #print [batch]+cbids curs.execute(upd_str, [batch] + cbids) conn.commit() #log.info('%i tasks assigned' % ( len(res) )) timer.stop('db.task.claim') return batch except sqlite3.OperationalError: print traceback.format_exc() print 'Database (todos) is locked on task_claim' time.sleep(0.95) continue break #timer.stop('db.task.claim') return None
def make_requests(uuids, folder): import json, timer timer = timer.timer() timer.start() requests = [grequests.get(fmt.format(uuid)) for uuid in uuids] responses = grequests.map(requests) timer.print(current_indent + "time to do get requests: ") written = 0 total = len(uuids) exception_uuids = set() for uuid, response in zip(uuids, responses): try: data = response.json()['data'] death_date = to_string(from_string(data['death_date'])) path = folder + "/" + death_date + "-" + uuid + ".json" # write BOM f = open(path, "wb") f.write(codecs.BOM_UTF8) f.close() # write the actual data f = open(path, "ab") f.write( json.dumps(data, ensure_ascii=False).encode( "utf-8").decode().encode("utf-8")) f.close() written += 1 except Exception as e: print(current_indent + "exception occurred on current batch.") print(current_indent + "uuid causing the exception: " + uuid) print(current_indent + str(e)) exception_uuids.add(uuid) timer.stop() timer.print(current_indent + "total time for current batch: ") return written, exception_uuids
def task_add( self, call ): timer.start('db.task.add') while True: try: conn, log = (self.tconn, self.tlog) with conn: curs = conn.cursor() # Check whether the output files already exist outputs_exist = True for i in range( 0, len(call.body['returns']) ): dbid = call.cbid+':'+str(i) if not glob.db.exists_temp_dbid( dbid ): outputs_exist = False break if not outputs_exist: # Check whether the task is already queued up curs.execute('SELECT cbid FROM todos WHERE cbid=?', (call.cbid,) ) res = curs.fetchone() if not res: # Find the first needed argument that is not already available next_arg = None if 'args' in call.body and len(call.body['args'])>0: for arg in call.body['args']: if not glob.db.find( arg ): next_arg = arg break ins = 'INSERT INTO todos (cbid, step, priority, next_arg) VALUES (?,?,0,?);' curs.execute( ins, (call.cbid, call.step, next_arg) ) conn.commit() log.info('%s added' % (call.cbid)) except sqlite3.OperationalError: print 'Database (todos) is locked on task_add' time.sleep(1) continue break timer.stop('db.task.add')
def run( self ): print 'Allocating %i local workers.' % glob.exec_local_concurrency self.workers = [] while not glob.shutting_down: timer.start('work') finished_worker_cnt = self.finish_workers() started_worker_cnt = self.start_workers() if started_worker_cnt<0: break timer.stop('work') if finished_worker_cnt == 0 and started_worker_cnt == 0: time.sleep(1)
def get_model(train, vectors, model_props): graph = build_graph(train, vectors, model_props) opt = model_props.get_optimizer() timer.start("compile") loss = {} if model_props.ranking: loss['y'] = get_sum(train.scale_factor * (0.1 if model_props.reinforce else 1)) else: if not model_props.anaphoricity_only: loss['y'] = get_summed_cross_entropy(train.scale_factor) if model_props.anaphoricity: loss['anaphoricities'] = get_summed_cross_entropy(train.anaphoricity_scale_factor) graph.compile(loss=loss, optimizer=opt) timer.stop("compile") if model_props.load_weights_from is not None: set_weights(graph, model_props.load_weights_from, model_props.weights_file) return graph, opt
def task_fail( self, call ): timer.start('db.task.fail') while True: try: conn, log = (self.tconn, self.tlog) with conn: curs = conn.cursor() upd = 'UPDATE todos SET assigned=? WHERE cbid=?;' curs.execute( upd, ('failed', call.cbid) ) conn.commit() except sqlite3.OperationalError: print 'Database (todos) is locked on task_update' time.sleep(1) continue break timer.stop('db.task.fail')
def task_claim( self, count=1 ): batch = uuid() timer.start('db.task.claim') while True: try: conn, log = (self.tconn, self.tlog) with conn: curs = conn.cursor() if glob.wq_stage: curs.execute('SELECT cbid FROM todos WHERE next_arg IS NULL AND assigned IS NULL AND step = ? LIMIT ?', (glob.wq_stage,count) ) else: curs.execute('SELECT cbid FROM todos WHERE next_arg IS NULL AND assigned IS NULL ORDER BY id LIMIT ?', (count,) ) res = curs.fetchall() cbids = [] for r in res: cbids.append(r['cbid']) if len(cbids)>0: upd_str = 'UPDATE todos SET assigned = ? WHERE next_arg IS NULL AND assigned IS NULL AND cbid IN (%s);' % ', '.join('?' for c in cbids) #print upd_str #print [batch]+cbids curs.execute( upd_str, [batch]+cbids ) conn.commit() #log.info('%i tasks assigned' % ( len(res) )) timer.stop('db.task.claim') return batch except sqlite3.OperationalError: print traceback.format_exc() print 'Database (todos) is locked on task_claim' time.sleep(0.95) continue break #timer.stop('db.task.claim') return None