def get(self, wiki_id, brand): ''' :param wiki_id: string :param brand: string name of brand ''' page_doc_response = ListDocIdsService().get(wiki_id) if page_doc_response['status'] != 200: return page_doc_response if USE_MULTIPROCESSING: m = Manager() d = m.list() l = m.list() done_ids = dict([(line.split(',')[0], True) for line in open('data_output.json', 'r')]) print len(done_ids) doc_ids = filter(lambda x: not done_ids.get(x, False), page_doc_response[wiki_id]) #sents = [a for b in Pool(processes=MP_NUM_CORES).map(add_brand_sent_sentiment, [(d, l, i, brand) for i in doc_ids]) for a in b] Pool(processes=MP_NUM_CORES).map(add_brand_sent_sentiment, [(d, l, i, brand) for i in doc_ids]) else: ses = SentencesForEntityService() total = len(page_doc_response[wiki_id]) counter = 0 sents = [] for doc_id in page_doc_response[wiki_id]: resp = ses.get(doc_id, brand) sents += (doc_id, resp.get(brand, [])) counter += 1 print "%d / %d" % (counter, total) return {'status': 200, brand: dict(sents)}
def scanner_network(self,gateway): scan = '' config_gateway = gateway.split('.') del config_gateway[-1] for i in config_gateway: scan += str(i) + '.' gateway = scan ranger = str(self.ip_range.text()).split('-') jobs = [] manager = Manager() on_ips = manager.dict() for n in xrange(int(ranger[0]),int(ranger[1])): ip='%s{0}'.format(n)%(gateway) p = Process(target=self.working,args=(ip,on_ips)) jobs.append(p) p.start() for i in jobs: i.join() for i in on_ips.values(): Headers = [] n = i.split('|') self.data['IPaddress'].append(n[0]) self.data['MacAddress'].append(n[1]) self.data['Hostname'].append('<unknown>') for n, key in enumerate(reversed(self.data.keys())): Headers.append(key) for m, item in enumerate(self.data[key]): item = QTableWidgetItem(item) item.setTextAlignment(Qt.AlignVCenter | Qt.AlignCenter) self.tables.setItem(m, n, item) Headers = [] for key in reversed(self.data.keys()): Headers.append(key) self.tables.setHorizontalHeaderLabels(Headers)
def getData(): if os.path.isfile("chat_urls.p"): chat_urls = pickle.load( open( "chat_urls.p", "rb" ) ) else: chat_urls = {} for user in users: chat_urls[user] = get_urls(user) teams_url = "http://espn.go.com/mlb/teams" pickle.dump( chat_urls, open( "chat_urls.p", "wb" ) ) # for user in chat_urls: # urls = chat_urls[user] # for url in urls: # getLog(url) logDB = {} for user in chat_urls: logDB[user] = {} p = Pool(20) i=0 manager = Manager() db = manager.dict() for user in chat_urls: for url in chat_urls[user]: i+=1 p.apply_async(addLogData, args=(url,db)) p.close() p.join() out = db._getvalue() outfile = open("rawChat.txt","wb") for url in out: outfile.write(out[url]+"\n")
def controller_failure_unit_test(): s = ["1001"] s1 = ["1002"] clear_config(s) clear_config(s1) manager1 = Manager() manager2 = Manager() failure1 = manager1.Value('i', 0) failed_list1 = manager1.list([]) failure2 = manager2.Value('i', 0) failed_list2 = manager2.list([]) processes = [] process2 = mp.Process(target=controller_failure_detection, args=(s, '1', failure1, failed_list1,)) processes.append(process2) process4 = mp.Process(target=controller_failure_detection, args=(s, '2', failure2, failed_list2,)) processes.append(process4) for p in processes: p.start() print 'STARTING:', p, p.is_alive() r = random.randint(1, 10) time.sleep(r) print 'terminated' t1 = time.time() logging.debug(str( ["controller failed at:"] + [t1])) processes[0].terminate() # Exit the completed processes for p in processes: p.join() print 'JOINED:', p, p.is_alive()
def processFiles(patch_dir): root = os.getcwd() glbl.data_dirs = {} if root != patch_dir: working_path = root+"/"+patch_dir else: working_path = root for path, dirs, files in os.walk(working_path): if len(dirs) == 0: glbl.data_dirs[path] = '' # Multiprocessing Section ######################################### Qids = glbl.data_dirs.keys() manager = Manager() # creates shared memory manager object results = manager.dict() # Add dictionary to manager, so it can be accessed across processes nextid = Queue() # Create Queue object to serve as shared id generator across processes for qid in Qids: nextid.put(qid) # Load the ids to be tested into the Queue for x in range(0,multiprocessing.cpu_count()): # Create one process per logical CPU p = Process(target=processData, args=(nextid,results)) # Assign process to processCBR function, passing in the Queue and shared dictionary glbl.jobs.append(p) # Add the process to a list of running processes p.start() # Start process running for j in glbl.jobs: j.join() # For each process, join them back to main, blocking on each one until finished # write out results c = 1 sets = results.keys() sets.sort() for x in sets: if results[x] != 'None': FINAL = open('result'+str(c)+'.txt','w') n = "\n************************************************************************************************\n" FINAL.write(n+"* "+x+' *\n'+n+results[x]+"\n") FINAL.close() c += 1
def __init__(self, firefox=None, email=None, senha=None, pasta=None): """'firefox' é o caminho para o binário do Firefox a ser usado. 'pasta' é o caminho para a pasta onde salvar os downloads.""" self.firefox = firefox self.pasta = pasta self.email = email self.senha = senha self.navegador = None self.app = None self.logger = None manager = Manager() self.safe_dict = manager.dict() self.clear_captcha() self.stop() self.try_break_audio_captcha = True self.nome_audio_captcha = "somCaptcha.wav" self.recognizer = sr.Recognizer(str('pt-BR')) self.user_agent = ( "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:28.0)" " Gecko/20100101 Firefox/28.0" ) self.base_url = 'http://esic.prefeitura.sp.gov.br' self.login_url = self.base_url + '/Account/Login.aspx' self.logado = False self.ja_tentou_cookies_salvos = False self.rodar_apenas_uma_vez = False
def correction_terms_threaded(self): '''Finds the correction terms assoctiated to the quadratic form, for each of the equivalance classes it finds the maximum by iterating through the relation vectors of the group. Uses multiprocessing.''' print 'Using multiprocessing' pool = Pool() # default: processes=None => uses cpu_count() manager = Manager() start_time = time.time() coef_lists = lrange(self.group.structure) # representatives = elements of C_1(V) (np.matrix) representatives = map(lambda l: self.find_rep(l), coef_lists) # list of maxes lst = manager.list([None for i in xrange(len(representatives))]) alphalist = list(self.get_alpha()) # cannot pickle generators pool.map_async(functools.partial(process_alpha_outside, self, representatives, lst), alphalist) pool.close() pool.join() # wait for pool to finish # get corrterms via (|alpha|^2+b)/4 print 'Computed from quadratic form in %g seconds' \ % (time.time() - start_time) return [Fraction(Fraction(alpha, self.int_inverse[1]) + self.b, 4) \ for alpha in lst]
class MemStorage: def __init__(self, config): self.config = config self.measures = Manager().list() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): pass def save(self, measure): self.measures.append(measure) def last(self): if len(self.measures) <= 0: return None return self.measures[-1] def __str__(self): buf = "<{} measures: [".format(self.__class__) for item in self.measures: buf += "'{}'".format(item) buf += "]>" return buf
def main(): init_params() vk = connect_to_vk(LOGIN, PASSWORD) audio_list = vk.method('audio.get', {}) total = len(audio_list) if not os.path.exists(DOWNLOAD_DIR): os.makedirs(DOWNLOAD_DIR) manager = Manager() workers_list = [] progress_list = manager.dict() downloaded_tracks = manager.Value('i', 0) lock = Lock() for f in audio_list[:WORKERS_COUNT - 1]: start_download_process(f, workers_list, progress_list, downloaded_tracks, lock) del audio_list[:WORKERS_COUNT - 1] while any(worker.is_alive() for worker in workers_list) or len(audio_list): if audio_list and len(workers_list) < WORKERS_COUNT: f = audio_list.pop(0) start_download_process(f, workers_list, progress_list, downloaded_tracks, lock) print_progress(progress_list, downloaded_tracks.value, total) clean_workers(workers_list) time.sleep(0.1) print "Done!"
def multiupload(self, filename, hosts): """Upload file to multiple hosts simultaneously The upload will be attempted for each host until the optimal file redundancy is achieved (a percentage of successful uploads) or the host list is depleted. Args: filename (str): The filename of the file to upload. hosts (list): A list of hosts as defined in the master host list. Returns: A list of dicts with 'host_name' and 'url' keys for all successful uploads or an empty list if all uploads failed. """ manager = Manager() successful_uploads = manager.list([]) def f(host): if len(successful_uploads)/float(len(hosts)) < settings.MIN_FILE_REDUNDANCY: # Optimal redundancy not achieved, keep going result = self.upload_to_host(filename, host) if 'error' in result: self._host_errors[host] += 1 else: successful_uploads.append(result) multiprocessing.dummy.Pool(len(hosts)).map(f, self._hosts_by_success(hosts)) return list(successful_uploads)
def download(self, sources, output_directory, filename): """Download a file from one of the provided sources The sources will be ordered by least amount of errors, so most successful hosts will be tried first. In case of failure, the next source will be attempted, until the first successful download is completed or all sources have been depleted. Args: sources: A list of dicts with 'host_name' and 'url' keys. output_directory (str): Directory to save the downloaded file in. filename (str): Filename assigned to the downloaded file. Returns: A dict with 'host_name' and 'filename' keys if the download is successful, or an empty dict otherwise. """ valid_sources = self._filter_sources(sources) if not valid_sources: return {'error': 'no valid sources'} manager = Manager() successful_downloads = manager.list([]) def f(source): if not successful_downloads: result = self.download_from_host(source, output_directory, filename) if 'error' in result: self._host_errors[source['host_name']] += 1 else: successful_downloads.append(result) multiprocessing.dummy.Pool(len(valid_sources)).map(f, valid_sources) return successful_downloads[0] if successful_downloads else {}
def concurrent_test(robot, rooms, num_trials, start_location = -1, chromosome = None): """ Run the tests in multiple processes. Can be directly swapped out for testAllMaps. """ # Setup variables num_rooms = len(rooms) # Total number of rooms total_trials = num_trials * num_rooms # Total number of trials processes = [] # List for all processes manager = Manager() # Manager to handle result transfer dict = manager.dict() # Dict which will store results # Create a process for each room, storing parameters in instance variables for i, room in enumerate(rooms): process = SimulationProcess(i, dict) process.robot = robot process.room = room process.num_trials = num_trials process.start_location = start_location process.chromosome = chromosome process.start() processes.append(process) #end for # Print the results total_score = 0 for i, process in enumerate(processes): process.join() (score, std) = dict[i] print("Room %d of %d done (score: %d std: %d)" % (i + 1, num_rooms, score, std)) total_score += score #end for print("Average score over %d trials: %d" % (total_trials, total_score / num_rooms)) return total_score / num_rooms #end concurrent_test
def run(args): # Limit it to a single GPU. os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) conn = create_db(args.db) m = Manager() logs = args.logging datasets = args.datasets embeddings = args.embeddings settings = args.settings # So we don't litter the fs dir_ = tempfile.mkdtemp(prefix='baseline-speed-test-') try: configs = get_configs(args.config) if not args.single: full_configs = [] for config in configs: full_configs.extend(edit_config(config, args.frameworks, args.no_crf, args.no_attn)) configs = full_configs if args.verbose: for config in configs: pprint(config) print() print() steps = len(configs) pg = create_progress_bar(steps) for config in configs: write_config = deepcopy(config) config['train']['epochs'] = args.trials task_name = config['task'] system_info = m.dict() p = Process( target=run_model, args=( system_info, config, logs, settings, datasets, embeddings, task_name, dir_, int(args.gpu) ) ) p.start() pid = p.pid p.join() log_file = os.path.join(dir_, 'timing-{}.log'.format(pid)) speeds = parse_logs(log_file) save_data(conn, speeds, write_config, system_info) pg.update() pg.done() finally: shutil.rmtree(dir_)
def run_multiprocesses_likelihood(self): lik = 0.0 workers = [] workers_no = self.configuration.num_threads corpusSplitlist = self.split_average_data(workers_no) likmanager = Manager() ManagerReturn_corpusSplitlist = [] ManagerReturn_corpusSplitlist_lik = [] for dataSplit in corpusSplitlist: likreturn_dataSplit = likmanager.list() likreturn_dataSplit_likvalue = likmanager.Value("",0.0) worker = Process(target=self.splitlikelihood, args=(dataSplit, likreturn_dataSplit, likreturn_dataSplit_likvalue)) worker.start() workers.append(worker) ManagerReturn_corpusSplitlist.append(likreturn_dataSplit) ManagerReturn_corpusSplitlist_lik.append(likreturn_dataSplit_likvalue) for w in workers: w.join() # compute all the likelihood for the splits: for v in ManagerReturn_corpusSplitlist_lik: lik += v.value # update all the docs into corpus, since we compute the doc distribution in likelihood() self.corpus.clear() for dataSplit in ManagerReturn_corpusSplitlist: for doc in dataSplit: self.corpus.append(doc) return lik
def __init__(self,port): manager = Manager() self.status=manager.dict() self.sendbuf=manager.list() self.p = Process(target=SocketManager, args=(port,self.status,self.sendbuf) ) self.p.daemon=True self.p.start()
def record_metrics(self, input_file): mgr = Manager() metrics_data = mgr.list() procs = [] for i in range(self.metrics): p = Process(target=self.record_metric, args=(input_file, metrics_data)) p.start() procs.append(p) if len(procs) >= self.procs: for p in procs: p.join() procs = [] for p in procs: p.join() l = set() for metric in metrics_data: if self.non_uniques: l.add(metric.bbs) else: l.add(metric.unique_bbs) self.stats["min"] = min(l) self.stats["max"] = max(l) self.stats["avg"] = reduce(lambda x, y: x + y, l) / float(len(l)) self.original_stats = dict(self.stats) self.print_statistics()
def func_thread(): a = numpy.random.rand(1000000) b = numpy.random.rand(1000000) nodata = 0.3 print "here" manager = Manager() lock = Lock() d = manager.dict() ps = [] start_time = time.clock() for i in numpy.where((a > 0.7) & (a < 0.9) & (a != nodata)): for j in numpy.where((b > 0.5) & (b < 0.9) & (b != nodata)): index = numpy.intersect1d(i, j) length = len(index)/2 array1 = index[:length] array2 = index[length:] for processes in range(2): p = Process(target=f_thread, args=(d, a, b, array1, lock)) ps.append(p) p.start() for p in ps: p.join() print time.clock() - start_time, "seconds" print len(d)
def main(): if len(sys.argv) > 1: print "cmd arg to set directory to: " + sys.argv[1] os.chdir(sys.argv[1]) print "cwd is: " + os.getcwd() # make sure we have the correct device keepTrying = True countCurrent = 0 countCurrentFail = 0 manager = Manager() sharedDictionary = manager.dict() while keepTrying: serial0 = serial.Serial("/dev/ttyACM0") # connection to arduino1 serial1 = serial.Serial("/dev/ttyACM1") # connection to arduino2 try: line = ser.readline() # read ardiono about once every two seconds I = float(line.split(" ")[1].strip()) # get the current reading countCurrent += 1 except Exception, e: countCurrentFail += 1 if countCurrent > countCurrentFail + 5: # 5 good readings keepTrying = False startThreading(sharedDictionary, serial0, serial1) elif countCurrentFail > countCurrent + 5: # 5 bad readings, do a swap keepTrying = False startThreading(sharedDictionary, serial1, serial0) print " . " + str(countCurrent) + "-" + str(countCurrentFail)
def timeout_iterator(iterator): """Wraps an iterator and makes it timeout after time ``timeout``. Parameters ---------- iterator : iterator Returns ------- timeout_iterator : iterator """ buffer_ = Manager().Queue() process = Process( target=partial(map), args=(buffer_.put, iterator) ) process.start() process.join(timeout) process.terminate() buffer_.put(QueueStop()) timeout_iterator = iter(buffer_.get, QueueStop()) return timeout_iterator
class LockingSession(object): def __init__(self, dataman, session_filename): self.dataman = dataman self.session_filename = session_filename self.lock = Manager().Lock() def acquire(self): self.lock.acquire() self.session = DataManager.shelf(self.session_filename) def release(self): self.session.close() self.session = None self.lock.release() def __getitem__(self, item): self.acquire() ret = self.session[item] self.release() return ret def __setitem__(self, item, value): self.acquire() self.session[item] = value self.release()
def sync(): from multiprocessing import Manager from common import bounty, settings, peers from common.safeprint import safeprint man = Manager() items = {'config':man.dict(), 'peerList':man.list(), 'bountyList':man.list(), 'bountyLock':bounty.bountyLock, 'keyList':man.list()} items['config'].update(settings.config) items['peerList'].extend(peers.peerlist) items['bountyList'].extend(bounty.bountyList) safeprint(items) safeprint(items.get('bountyList')) safeprint(items.get('keyList')) if items.get('config') is not None: from common import settings settings.config = items.get('config') if items.get('peerList') is not None: global peerList peers.peerlist = items.get('peerList') if items.get('bountyList') is not None: from common import bounty bounty.bountyList = items.get('bountyList') if items.get('bountyLock') is not None: from common import bounty bounty.bountyLock = items.get('bountyLock') return items
def pricing(dual): cpus = cpu_count() - int(argv[2]) '''process for getting new columns''' final = pow(2, K) if K < 23: section = final else: section = 100 * cpus # probar valores to = 0 since = 1 manager = Manager() elements = manager.list([RETAILERS, DCS, PLANTS]) out = manager.Queue() # queue with the result from each worker while to < final: p = Pool(cpus) to = min(since + section, final) boss = p.apply_async(coordinator, (out,)) workers = [p.apply_async(work, (k, elements, dual, out)) for k in xrange(since, to)] enviados = 0 for w in workers: enviados += w.get() out.put('ok') a = boss.get() assert a.counter == enviados since = to + 1 p.terminate() return a
def solve(iterations, proc_count): queue = JoinableQueue() partition = get_iterations_partition(iterations, proc_count) for iteration in partition: queue.put(iteration) for i in range(process_count): queue.put(None) manager = Manager() result = manager.list() processes = [] cur_time = time.time() for i in range(process_count): proc = Process(target=worker, args=(queue, result,)) proc.start() processes.append(proc) queue.join() for proc in processes: proc.join() cur_time = time.time() - cur_time print_results(cur_time, result, iterations)
def spawn(self, n=GAME_CT): # Fallback on import error or single core try: from multiprocessing import Process, Manager, cpu_count except ImportError: return self.run(n) # For low n multiprocessing does not gain much speed up if cpu_count() <= 1 or n < 500: return self.run(n) m = Manager() self.results = m.list() procs = [] load = [n // cpu_count()] * cpu_count() # add the rest from division to last cpu load[-1] += n % cpu_count() for count in load: proc = Process(target=self.run, args=(count,)) proc.start() procs.append(proc) [p.join() for p in procs]
def run(): # build the mdp start = time.time() room_size = 3 num_rooms = 5 mdp = maze_mdp.MazeMDP(room_size=room_size, num_rooms=num_rooms) # build the agent m = Manager() init_dict = {(s, a): 0 for s in mdp.states for a in mdp.ACTIONS + [None]} shared_weights = m.dict(init_dict) shared_value_weights = m.dict(init_dict) agent = async_actor_critic.AsyncActorCritic(actions=mdp.ACTIONS, discount=mdp.DISCOUNT, weights=shared_weights, value_weights=shared_value_weights, tau=.3, learning_rate=.5) # build a single experiment rewards = m.list() start_state_values = m.list() max_steps = (2 * room_size * num_rooms) ** 2 exp = experiment.Experiment(mdp=mdp, agent=agent, num_episodes=800, max_steps=max_steps, rewards=rewards, start_state_values=start_state_values) # run the experiment multiexperiment = experiment.MultiProcessExperiment(experiment=exp, num_agents=NUM_PROCESSES) multiexperiment.run() # report results end = time.time() print 'took {} seconds to converge'.format(end - start) mdp.print_state_values(shared_value_weights) optimal = mdp.EXIT_REWARD + (2 * room_size * num_rooms * mdp.MOVE_REWARD) utils.plot_values(rewards, optimal, 'rewards') utils.plot_values(start_state_values, optimal, 'start state value')
def scanner_network(self,gateway): get_ip = len(gateway)-1 gateway = gateway[:get_ip] ranger = str(self.ip_range.text()).split("-") self.control = True jobs = [] manager = Manager() on_ips = manager.dict() for n in xrange(int(ranger[0]),int(ranger[1])): ip="%s{0}".format(n)%(gateway) p = Process(target=self.working,args=(ip,on_ips)) jobs.append(p) p.start() for i in jobs: i.join() for i in on_ips.values(): Headers = [] n = i.split("|") self.data['IPaddress'].append(n[0]) self.data['MacAddress'].append(n[1]) self.data['Hostname'].append("<unknown>") for n, key in enumerate(reversed(self.data.keys())): Headers.append(key) for m, item in enumerate(self.data[key]): item = QTableWidgetItem(item) item.setTextAlignment(Qt.AlignVCenter | Qt.AlignCenter) self.tables.setItem(m, n, item) self.scanner_OFF(False,"txt_status") Headers = [] for key in reversed(self.data.keys()): Headers.append(key) self.tables.setHorizontalHeaderLabels(Headers)
def __init__(self): manager = Manager() self.flow_to_state_map = manager.dict() self.flow_to_state_map.clear() self.trigger = manager.Value('i', 0) self.comp = manager.Value('i', 0) # sequential = 0, parallel = 1
def multi_download(url_and_name_list, num_threads=8): ''' accepts list of tuples, where t[0] = url and t[1] = filename ''' manager = Manager() #pylint: disable=no-member m_list = manager.list() #pylint: enable=no-member log = logging.getLogger('multi_dl') log.debug('starting pool with ' + str(num_threads) + ' workers') monitor_thread = Process(target=download_monitor, args=((m_list, len(url_and_name_list)),)) monitor_thread.start() workers = Pool(processes=num_threads) work = workers.map_async(single_download, zip(url_and_name_list, repeat(m_list))) # this hack makes the async_map respond to ^C interrupts try: work.get(0xFFFF) monitor_thread.join() sys.stdout.write('\n\n') except KeyboardInterrupt: print 'parent received control-c' exit()
def crackTicket(ticket, label, hashList): try: data = base64.b64decode(ticket) except: #print "DEBUG\n" + str(ticket) + "DEBUG\n\n" return "FAIL" + str(label) + "\n" manager = Manager() enctickets = manager.list() if data[0] == '\x76': try: enctickets.append((str(decoder.decode(data)[0][2][0][3][2]))) except: #print "DEBUG\n" + str(ticket) + "DEBUG\n\n" return "FAIL" + str(label) elif data[:2] == '6d': for ticket in data.strip().split('\n'): try: enctickets.append((str(decoder.decode(ticket.decode('hex'))[0][4][3][2]))) except: #print "DEBUG\n" + str(ticket) + "DEBUG\n\n" return "FAIL" + str(label) print "\nAccount: " + label for currentHash in hashList: ntlmHash_hex = binascii.unhexlify(currentHash) kdata, nonce = kerberos.decrypt(ntlmHash_hex, 2, enctickets[0]) if kdata: print "NTLM Hash: " + currentHash break return ""
def aggress(map): global startMap startMap = map #print "Regressing..." state = State() jobs = [] longestSolution = Value('d', 20) highestScore = Value('d', 0) queue = JoinableQueue() manager = Manager() d = manager.dict() d.clear() l = RLock() if multiProc: queue.put((state, map, 1)) for i in range(numProcs): p = Process(target = multiMain, args=(startMap, l, d, queue,highestScore)) p.start() queue.join() else: a(l, highestScore, d, None, state, map, 1)
def main(): # Hard-coded parameters needed for USGS datasets usgs_product_dict = { "ned": { "product": "National Elevation Dataset (NED)", "dataset": { "ned1sec": (1.0 / 3600, 30, 100), "ned13sec": (1.0 / 3600 / 3, 10, 30), "ned19sec": (1.0 / 3600 / 9, 3, 10), }, "subset": {}, "extent": ["1 x 1 degree", "15 x 15 minute"], "format": "IMG", "extension": "img", "zip": True, "srs": "wgs84", "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", "interpolation": "bilinear", "url_split": "/", }, "nlcd": { "product": "National Land Cover Database (NLCD)", "dataset": { "National Land Cover Database (NLCD) - 2001": (1.0 / 3600, 30, 100), "National Land Cover Database (NLCD) - 2006": (1.0 / 3600, 30, 100), "National Land Cover Database (NLCD) - 2011": (1.0 / 3600, 30, 100), }, "subset": { "Percent Developed Imperviousness", "Percent Tree Canopy", "Land Cover", }, "extent": ["3 x 3 degree"], "format": "GeoTIFF", "extension": "tif", "zip": True, "srs": "wgs84", "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", "interpolation": "nearest", "url_split": "/", }, "naip": { "product": "USDA National Agriculture Imagery Program (NAIP)", "dataset": { "Imagery - 1 meter (NAIP)": (1.0 / 3600 / 27, 1, 3) }, "subset": {}, "extent": [ "3.75 x 3.75 minute", ], "format": "JPEG2000", "extension": "jp2", "zip": False, "srs": "wgs84", "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", "interpolation": "nearest", "url_split": "/", }, "lidar": { "product": "Lidar Point Cloud (LPC)", "dataset": { "Lidar Point Cloud (LPC)": (1.0 / 3600 / 9, 3, 10) }, "subset": {}, "extent": [""], "format": "LAS,LAZ", "extension": "las,laz", "zip": True, "srs": "", "srs_proj4": "+proj=longlat +ellps=GRS80 +datum=NAD83 +nodefs", "interpolation": "nearest", "url_split": "/", }, } # Set GRASS GUI options and flags to python variables gui_product = options["product"] # Variable assigned from USGS product dictionary nav_string = usgs_product_dict[gui_product] product = nav_string["product"] product_format = nav_string["format"] product_extensions = tuple(nav_string["extension"].split(",")) product_is_zip = nav_string["zip"] product_srs = nav_string["srs"] product_proj4 = nav_string["srs_proj4"] product_interpolation = nav_string["interpolation"] product_url_split = nav_string["url_split"] product_extent = nav_string["extent"] gui_subset = None # Parameter assignments for each dataset if gui_product == "ned": gui_dataset = options["ned_dataset"] ned_api_name = "" if options["ned_dataset"] == "ned1sec": ned_data_abbrv = "ned_1arc_" ned_api_name = "1 arc-second" if options["ned_dataset"] == "ned13sec": ned_data_abbrv = "ned_13arc_" ned_api_name = "1/3 arc-second" if options["ned_dataset"] == "ned19sec": ned_data_abbrv = "ned_19arc_" ned_api_name = "1/9 arc-second" product_tag = product + " " + ned_api_name if gui_product == "nlcd": gui_dataset = options["nlcd_dataset"] if options["nlcd_dataset"] == "nlcd2001": gui_dataset = "National Land Cover Database (NLCD) - 2001" if options["nlcd_dataset"] == "nlcd2006": gui_dataset = "National Land Cover Database (NLCD) - 2006" if options["nlcd_dataset"] == "nlcd2011": gui_dataset = "National Land Cover Database (NLCD) - 2011" if options["nlcd_subset"] == "landcover": gui_subset = "Land Cover" if options["nlcd_subset"] == "impervious": gui_subset = "Percent Developed Imperviousness" if options["nlcd_subset"] == "canopy": gui_subset = "Percent Tree Canopy" product_tag = gui_dataset if gui_product == "naip": gui_dataset = "Imagery - 1 meter (NAIP)" product_tag = nav_string["product"] has_pdal = gscript.find_program(pgm="v.in.pdal") if gui_product == "lidar": gui_dataset = "Lidar Point Cloud (LPC)" product_tag = nav_string["product"] if not has_pdal: gscript.warning( _("Module v.in.pdal is missing," " any downloaded data will not be processed.")) # Assigning further parameters from GUI gui_output_layer = options["output_name"] gui_resampling_method = options["resampling_method"] gui_i_flag = flags["i"] gui_k_flag = flags["k"] work_dir = options["output_directory"] memory = options["memory"] nprocs = options["nprocs"] preserve_extracted_files = gui_k_flag use_existing_extracted_files = True preserve_imported_tiles = gui_k_flag use_existing_imported_tiles = True if not os.path.isdir(work_dir): gscript.fatal( _("Directory <{}> does not exist." " Please create it.").format(work_dir)) # Returns current units try: proj = gscript.parse_command("g.proj", flags="g") if gscript.locn_is_latlong(): product_resolution = nav_string["dataset"][gui_dataset][0] elif float(proj["meters"]) == 1: product_resolution = nav_string["dataset"][gui_dataset][1] else: # we assume feet product_resolution = nav_string["dataset"][gui_dataset][2] except TypeError: product_resolution = False if gui_product == "lidar" and options["resolution"]: product_resolution = float(options["resolution"]) if gui_resampling_method == "default": gui_resampling_method = nav_string["interpolation"] gscript.verbose( _("The default resampling method for product {product} is {res}"). format(product=gui_product, res=product_interpolation)) # Get coordinates for current GRASS computational region and convert to USGS SRS gregion = gscript.region() wgs84 = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" min_coords = gscript.read_command( "m.proj", coordinates=(gregion["w"], gregion["s"]), proj_out=wgs84, separator="comma", flags="d", ) max_coords = gscript.read_command( "m.proj", coordinates=(gregion["e"], gregion["n"]), proj_out=wgs84, separator="comma", flags="d", ) min_list = min_coords.split(",")[:2] max_list = max_coords.split(",")[:2] list_bbox = min_list + max_list str_bbox = ",".join((str(coord) for coord in list_bbox)) # Format variables for TNM API call gui_prod_str = str(product_tag) datasets = quote_plus(gui_prod_str) prod_format = quote_plus(product_format) prod_extent = quote_plus(product_extent[0]) # Create TNM API URL base_TNM = "https://tnmaccess.nationalmap.gov/api/v1/products?" datasets_TNM = "datasets={0}".format(datasets) bbox_TNM = "&bbox={0}".format(str_bbox) prod_format_TNM = "&prodFormats={0}".format(prod_format) TNM_API_URL = base_TNM + datasets_TNM + bbox_TNM + prod_format_TNM if gui_product == "nlcd": TNM_API_URL += "&prodExtents={0}".format(prod_extent) gscript.verbose("TNM API Query URL:\t{0}".format(TNM_API_URL)) # Query TNM API try_again_messge = _( "Possibly, the query has timed out. Check network configuration and try again." ) try: TNM_API_GET = urlopen(TNM_API_URL, timeout=12) except HTTPError as error: gscript.fatal( _("HTTP(S) error from USGS TNM API:" " {code}: {reason} ({instructions})").format( reason=error.reason, code=error.code, instructions=try_again_messge)) except (URLError, OSError, IOError) as error: # Catching also SSLError and potentially others which are # subclasses of IOError in Python 2 and of OSError in Python 3. gscript.fatal( _("Error accessing USGS TNM API: {error} ({instructions})").format( error=error, instructions=try_again_messge)) # Parse return JSON object from API query try: return_JSON = json.load(TNM_API_GET) if return_JSON["errors"]: TNM_API_error = return_JSON["errors"] api_error_msg = "TNM API Error - {0}".format(str(TNM_API_error)) gscript.fatal(api_error_msg) if gui_product == "lidar" and options["title_filter"]: return_JSON["items"] = [ item for item in return_JSON["items"] if options["title_filter"] in item["title"] ] return_JSON["total"] = len(return_JSON["items"]) except: gscript.fatal(_("Unable to load USGS JSON object.")) # Functions down_list() and exist_list() used to determine # existing files and those that need to be downloaded. def down_list(): dwnld_url.append(TNM_file_URL) dwnld_size.append(TNM_file_size) TNM_file_titles.append(TNM_file_title) if product_is_zip: extract_zip_list.append(local_zip_path) def exist_list(): exist_TNM_titles.append(TNM_file_title) exist_dwnld_url.append(TNM_file_URL) if product_is_zip: exist_zip_list.append(local_zip_path) extract_zip_list.append(local_zip_path) else: exist_tile_list.append(local_tile_path) # Assign needed parameters from returned JSON tile_API_count = int(return_JSON["total"]) tiles_needed_count = 0 # TODO: Make the tolerance configurable. # Some combinations produce >10 byte differences. size_diff_tolerance = 5 exist_dwnld_size = 0 if tile_API_count > 0: dwnld_size = [] dwnld_url = [] TNM_file_titles = [] exist_dwnld_url = [] exist_TNM_titles = [] exist_zip_list = [] exist_tile_list = [] extract_zip_list = [] # for each file returned, assign variables to needed parameters for f in return_JSON["items"]: TNM_file_title = f["title"] TNM_file_URL = str(f["downloadURL"]) TNM_file_size = int(f["sizeInBytes"]) TNM_file_name = TNM_file_URL.split(product_url_split)[-1] if gui_product == "ned": local_file_path = os.path.join(work_dir, ned_data_abbrv + TNM_file_name) local_zip_path = os.path.join(work_dir, ned_data_abbrv + TNM_file_name) local_tile_path = os.path.join(work_dir, ned_data_abbrv + TNM_file_name) else: local_file_path = os.path.join(work_dir, TNM_file_name) local_zip_path = os.path.join(work_dir, TNM_file_name) local_tile_path = os.path.join(work_dir, TNM_file_name) file_exists = os.path.exists(local_file_path) file_complete = None # If file exists, do not download, # but if incomplete (e.g. interupted download), redownload. if file_exists: existing_local_file_size = os.path.getsize(local_file_path) # if local file is incomplete if abs(existing_local_file_size - TNM_file_size) > size_diff_tolerance: gscript.verbose( _("Size of local file {filename} ({local_size}) differs" " from a file size specified in the API ({api_size})" " by {difference} bytes" " which is more than tolerance ({tolerance})." " It will be downloaded again.").format( filename=local_file_path, local_size=existing_local_file_size, api_size=TNM_file_size, difference=abs(existing_local_file_size - TNM_file_size), tolerance=size_diff_tolerance, )) # NLCD API query returns subsets that cannot be filtered before # results are returned. gui_subset is used to filter results. if not gui_subset: tiles_needed_count += 1 down_list() else: if gui_subset in TNM_file_title: tiles_needed_count += 1 down_list() else: continue else: if not gui_subset: tiles_needed_count += 1 exist_list() exist_dwnld_size += TNM_file_size else: if gui_subset in TNM_file_title: tiles_needed_count += 1 exist_list() exist_dwnld_size += TNM_file_size else: continue else: if not gui_subset: tiles_needed_count += 1 down_list() else: if gui_subset in TNM_file_title: tiles_needed_count += 1 down_list() continue # return fatal error if API query returns no results for GUI input elif tile_API_count == 0: gscript.fatal( _("TNM API ERROR or Zero tiles available for given input parameters." )) # number of files to be downloaded file_download_count = len(dwnld_url) # remove existing files from download lists for t in exist_TNM_titles: if t in TNM_file_titles: TNM_file_titles.remove(t) for url in exist_dwnld_url: if url in dwnld_url: dwnld_url.remove(url) # messages to user about status of files to be kept, removed, or downloaded if exist_zip_list: exist_msg = _( "\n{0} of {1} files/archive(s) exist locally and will be used by module." ).format(len(exist_zip_list), tiles_needed_count) gscript.message(exist_msg) # TODO: fix this way of reporting and merge it with the one in use if exist_tile_list: exist_msg = _( "\n{0} of {1} files/archive(s) exist locally and will be used by module." ).format(len(exist_tile_list), tiles_needed_count) gscript.message(exist_msg) # formats JSON size from bites into needed units for combined file size if dwnld_size: total_size = sum(dwnld_size) len_total_size = len(str(total_size)) if 6 < len_total_size < 10: total_size_float = total_size * 1e-6 total_size_str = str("{0:.2f}".format(total_size_float) + " MB") if len_total_size >= 10: total_size_float = total_size * 1e-9 total_size_str = str("{0:.2f}".format(total_size_float) + " GB") else: total_size_str = "0" # Prints 'none' if all tiles available locally if TNM_file_titles: TNM_file_titles_info = "\n".join(TNM_file_titles) else: TNM_file_titles_info = "none" # Formatted return for 'i' flag if file_download_count <= 0: data_info = "USGS file(s) to download: NONE" if gui_product == "nlcd": if tile_API_count != file_download_count: if tiles_needed_count == 0: nlcd_unavailable = ( "NLCD {0} data unavailable for input parameters". format(gui_subset)) gscript.fatal(nlcd_unavailable) else: data_info = ( "USGS file(s) to download:", "-------------------------", "Total download size:\t{size}", "Tile count:\t{count}", "USGS SRS:\t{srs}", "USGS tile titles:\n{tile}", "-------------------------", ) data_info = "\n".join(data_info).format( size=total_size_str, count=file_download_count, srs=product_srs, tile=TNM_file_titles_info, ) print(data_info) if gui_i_flag: gscript.info( _("To download USGS data, remove <i> flag, and rerun r.in.usgs.")) sys.exit() # USGS data download process if file_download_count <= 0: gscript.message(_("Extracting existing USGS Data...")) else: gscript.message(_("Downloading USGS Data...")) TNM_count = len(dwnld_url) download_count = 0 local_tile_path_list = [] local_zip_path_list = [] patch_names = [] # Download files for url in dwnld_url: # create file name by splitting name from returned url # add file name to local download directory if gui_product == "ned": file_name = ned_data_abbrv + url.split(product_url_split)[-1] local_file_path = os.path.join(work_dir, file_name) else: file_name = url.split(product_url_split)[-1] local_file_path = os.path.join(work_dir, file_name) try: # download files in chunks rather than write complete files to memory dwnld_req = urlopen(url, timeout=12) download_bytes = int(dwnld_req.info()["Content-Length"]) CHUNK = 16 * 1024 with open(local_file_path, "wb+") as local_file: count = 0 steps = int(download_bytes / CHUNK) + 1 while True: chunk = dwnld_req.read(CHUNK) gscript.percent(count, steps, 10) count += 1 if not chunk: break local_file.write(chunk) gscript.percent(1, 1, 1) local_file.close() download_count += 1 # determine if file is a zip archive or another format if product_is_zip: local_zip_path_list.append(local_file_path) else: local_tile_path_list.append(local_file_path) file_complete = "Download {0} of {1}: COMPLETE".format( download_count, TNM_count) gscript.info(file_complete) except URLError: gscript.fatal( _("USGS download request has timed out. Network or formatting error." )) except StandardError: cleanup_list.append(local_file_path) if download_count: file_failed = "Download {0} of {1}: FAILED".format( download_count, TNM_count) gscript.fatal(file_failed) # sets already downloaded zip files or tiles to be extracted or imported # our pre-stats for extraction are broken, collecting stats during used_existing_extracted_tiles_num = 0 removed_extracted_tiles_num = 0 old_extracted_tiles_num = 0 extracted_tiles_num = 0 if exist_zip_list: for z in exist_zip_list: local_zip_path_list.append(z) if exist_tile_list: for t in exist_tile_list: local_tile_path_list.append(t) if product_is_zip: if file_download_count == 0: pass else: gscript.message("Extracting data...") # for each zip archive, extract needed file files_to_process = len(local_zip_path_list) for i, z in enumerate(local_zip_path_list): # TODO: measure only for the files being unzipped gscript.percent(i, files_to_process, 10) # Extract tiles from ZIP archives try: with zipfile.ZipFile(z, "r") as read_zip: for f in read_zip.namelist(): if f.lower().endswith(product_extensions): extracted_tile = os.path.join(work_dir, str(f)) remove_and_extract = True if os.path.exists(extracted_tile): if use_existing_extracted_files: # if the downloaded file is newer # than the extracted on, we extract if os.path.getmtime( extracted_tile) < os.path.getmtime( z): remove_and_extract = True old_extracted_tiles_num += 1 else: remove_and_extract = False used_existing_extracted_tiles_num += 1 else: remove_and_extract = True if remove_and_extract: removed_extracted_tiles_num += 1 os.remove(extracted_tile) if remove_and_extract: extracted_tiles_num += 1 read_zip.extract(f, work_dir) if os.path.exists(extracted_tile): local_tile_path_list.append(extracted_tile) if not preserve_extracted_files: cleanup_list.append(extracted_tile) except IOError as error: cleanup_list.append(extracted_tile) gscript.fatal( _("Unable to locate or extract IMG file '{filename}'" " from ZIP archive '{zipname}': {error}").format( filename=extracted_tile, zipname=z, error=error)) gscript.percent(1, 1, 1) # TODO: do this before the extraction begins gscript.verbose( _("Extracted {extracted} new tiles and" " used {used} existing tiles").format( used=used_existing_extracted_tiles_num, extracted=extracted_tiles_num)) if old_extracted_tiles_num: gscript.verbose( _("Found {removed} existing tiles older" " than the corresponding downloaded archive").format( removed=old_extracted_tiles_num)) if removed_extracted_tiles_num: gscript.verbose( _("Removed {removed} existing tiles").format( removed=removed_extracted_tiles_num)) if gui_product == "lidar" and not has_pdal: gscript.fatal( _("Module v.in.pdal is missing," " cannot process downloaded data.")) # operations for extracted or complete files available locally # We are looking only for the existing maps in the current mapset, # but theoretically we could be getting them from other mapsets # on search path or from the whole location. User may also want to # store the individual tiles in a separate mapset. # The big assumption here is naming of the maps (it is a smaller # for the files in a dedicated download directory). used_existing_imported_tiles_num = 0 imported_tiles_num = 0 mapset = get_current_mapset() files_to_import = len(local_tile_path_list) process_list = [] process_id_list = [] process_count = 0 num_tiles = len(local_tile_path_list) with Manager() as manager: results = manager.dict() for i, t in enumerate(local_tile_path_list): # create variables for use in GRASS GIS import process LT_file_name = os.path.basename(t) LT_layer_name = os.path.splitext(LT_file_name)[0] # we are removing the files if requested even if we don't use them # do not remove by default with NAIP, there are no zip files if gui_product != "naip" and not preserve_extracted_files: cleanup_list.append(t) # TODO: unlike the files, we don't compare date with input if use_existing_imported_tiles and map_exists( "raster", LT_layer_name, mapset): patch_names.append(LT_layer_name) used_existing_imported_tiles_num += 1 else: in_info = _("Importing and reprojecting {name}" " ({count} out of {total})...").format( name=LT_file_name, count=i + 1, total=files_to_import) gscript.info(in_info) process_count += 1 if gui_product != "lidar": process = Process( name="Import-{}-{}-{}".format(process_count, i, LT_layer_name), target=run_file_import, kwargs=dict( identifier=i, results=results, input=t, output=LT_layer_name, resolution="value", resolution_value=product_resolution, extent="region", resample=product_interpolation, memory=memory, ), ) else: srs = options["input_srs"] process = Process( name="Import-{}-{}-{}".format(process_count, i, LT_layer_name), target=run_lidar_import, kwargs=dict( identifier=i, results=results, input=t, output=LT_layer_name, input_srs=srs if srs else None, ), ) process.start() process_list.append(process) process_id_list.append(i) # Wait for processes to finish when we reached the max number # of processes. if process_count == nprocs or i == num_tiles - 1: exitcodes = 0 for process in process_list: process.join() exitcodes += process.exitcode if exitcodes != 0: if nprocs > 1: gscript.fatal( _("Parallel import and reprojection failed." " Try running with nprocs=1.")) else: gscript.fatal( _("Import and reprojection step failed.")) for identifier in process_id_list: if "errors" in results[identifier]: gscript.warning(results[identifier]["errors"]) else: patch_names.append(results[identifier]["output"]) imported_tiles_num += 1 # Empty the process list process_list = [] process_id_list = [] process_count = 0 # no process should be left now assert not process_list assert not process_id_list assert not process_count gscript.verbose( _("Imported {imported} new tiles and" " used {used} existing tiles").format( used=used_existing_imported_tiles_num, imported=imported_tiles_num)) # if control variables match and multiple files need to be patched, # check product resolution, run r.patch # v.surf.rst lidar params rst_params = dict(tension=25, smooth=0.1, npmin=100) # Check that downloaded files match expected count completed_tiles_count = len(local_tile_path_list) if completed_tiles_count == tiles_needed_count: if len(patch_names) > 1: try: gscript.use_temp_region() # set the resolution if product_resolution: gscript.run_command("g.region", res=product_resolution, flags="a") if gui_product == "naip": for i in ("1", "2", "3", "4"): patch_names_i = [ name + "." + i for name in patch_names ] output = gui_output_layer + "." + i gscript.run_command("r.patch", input=patch_names_i, output=output) gscript.raster_history(output) elif gui_product == "lidar": gscript.run_command( "v.patch", flags="nzb", input=patch_names, output=gui_output_layer, ) gscript.run_command("v.surf.rst", input=gui_output_layer, elevation=gui_output_layer, nprocs=nprocs, **rst_params) else: gscript.run_command("r.patch", input=patch_names, output=gui_output_layer) gscript.raster_history(gui_output_layer) gscript.del_temp_region() out_info = ("Patched composite layer '{0}' added" ).format(gui_output_layer) gscript.verbose(out_info) # Remove files if not -k flag if not preserve_imported_tiles: if gui_product == "naip": for i in ("1", "2", "3", "4"): patch_names_i = [ name + "." + i for name in patch_names ] gscript.run_command("g.remove", type="raster", name=patch_names_i, flags="f") elif gui_product == "lidar": gscript.run_command( "g.remove", type="vector", name=patch_names + [gui_output_layer], flags="f", ) else: gscript.run_command("g.remove", type="raster", name=patch_names, flags="f") except CalledModuleError: gscript.fatal("Unable to patch tiles.") temp_down_count = _( "{0} of {1} tiles successfully imported and patched").format( completed_tiles_count, tiles_needed_count) gscript.info(temp_down_count) elif len(patch_names) == 1: if gui_product == "naip": for i in ("1", "2", "3", "4"): gscript.run_command( "g.rename", raster=(patch_names[0] + "." + i, gui_output_layer + "." + i), ) elif gui_product == "lidar": if product_resolution: gscript.run_command("g.region", res=product_resolution, flags="a") gscript.run_command("v.surf.rst", input=patch_names[0], elevation=gui_output_layer, nprocs=nprocs, **rst_params) if not preserve_imported_tiles: gscript.run_command("g.remove", type="vector", name=patch_names[0], flags="f") else: gscript.run_command("g.rename", raster=(patch_names[0], gui_output_layer)) temp_down_count = _("Tile successfully imported") gscript.info(temp_down_count) else: gscript.fatal( _("No tiles imported successfully. Nothing to patch.")) else: gscript.fatal( _("Error in getting or importing the data (see above). Please retry." )) # Keep source files if 'k' flag active if gui_k_flag: src_msg = ( "<k> flag selected: Source tiles remain in '{0}'").format(work_dir) gscript.info(src_msg) # set appropriate color table if gui_product == "ned": gscript.run_command("r.colors", map=gui_output_layer, color="elevation") # composite NAIP if gui_product == "naip": gscript.use_temp_region() gscript.run_command("g.region", raster=gui_output_layer + ".1") gscript.run_command( "r.composite", red=gui_output_layer + ".1", green=gui_output_layer + ".2", blue=gui_output_layer + ".3", output=gui_output_layer, ) gscript.raster_history(gui_output_layer) gscript.del_temp_region()
createDatabase(aggregatedAndImputedDatebaseName) dataset = getAllRecordsFromDatabase(databaseName) lengthOfDataset = len(dataset) dataGroupCount = int(lengthOfDataset / cpuCoreCount) dataGroupCollection = list() for i in range(cpuCoreCount): if i != cpuCoreCount - 1: subDataset = dataset[i * dataGroupCount:(i + 1) * dataGroupCount, :] dataGroupCollection.append(subDataset) else: subDataset = dataset[i * dataGroupCount:, :] dataGroupCollection.append(subDataset) processDictionary = dict() resultFromProcessesDictionary = dict() with Manager() as manager: for i in range(cpuCoreCount): resultFromProcessesDictionary[i] = manager.list() resultList = resultFromProcessesDictionary.get(i) processDictionary[i] = Process(target=processSubDataset, args=(dataGroupCollection[i], resultList)) for i in range(cpuCoreCount): processDictionary.get(i).start() for i in range(cpuCoreCount): processDictionary.get(i).join() datasetFromProcesses = list() for i in range(cpuCoreCount): datasetFromProcesses.extend( resultFromProcessesDictionary.get(i)) print(len(datasetFromProcesses))
def pipeline_STEP1(cfg, logger, cpu_number): s = "Working with STEP 1: running MAFFT" print(s) logger.info(s) input_folder = cfg["input_folder"] output_folder = cfg["output_folder"] out_suffix = cfg["output_suffix"] create_result_folder(output_folder, logger) os.chdir(input_folder) unwanted_files = [ x for x in os.listdir(os.getcwd()) if "_temp_aligned.fasta" in x ] # Remove files from a previously aborted run . for f in unwanted_files: os.remove(f) finished_files = [ x.split(out_suffix)[0] for x in os.listdir(output_folder) ] starting_files = sorted([ x for x in os.listdir(os.getcwd()) if cfg["input_suffix"] in x and find_n_seqs(x, 1, cfg["upper_sequence_limit"]) and "_core" not in x and "_addit" not in x and x.split(out_suffix)[0] not in finished_files ]) for fasta in starting_files: is_fasta(fasta) # Size threshold for switching from small_fastas alignment to large_fastas size_threshold = cfg["MAFFT_upper_limit_addfragments"] small_fastas = [ x for x in starting_files if find_n_seqs(x, 1, size_threshold + 1) ] logger.debug("there are {} small_fastas".format(len(small_fastas))) large_fastas = [ x for x in starting_files if not find_n_seqs(x, 1, size_threshold + 1) ] logger.debug("there are {} large_fastas".format(len(large_fastas))) if small_fastas: manager = Manager() fastas = manager.Queue() result_dict = manager.dict() logger.info("Running Mafft on small fasta") process_future_fasta( STEP1.run_MAFFT_small, small_fastas, result_dict, fastas, cpu_number, logger, cfg, tqdm_desc="Mafft on small files", ) if large_fastas: manager = Manager() fastas = manager.Queue() result_dict = manager.dict() logger.info("Running Mafft on large fasta") process_future_fasta( STEP1.run_MAFFT_large, large_fastas, result_dict, fastas, cpu_number, logger, cfg, tqdm_desc="Mafft on large files", ) logger.info("STEP 1 finished")
def GSO(bounds, num_particles, max_iter, classifier, train_data, epochs, batch_size, mini_batch_size=None): """ Galactic Swarm Optimization: ---------------------------- A meta-heuristic algorithm insipred by the interplay of stars, galaxies and superclusters under the influence of gravity. Input: ------ M: integer number of galaxies bounds: bounds of the search space across each dimension [lower_bound, upper_bound] * dims We specify only lower_bound and upper_bound """ subswarm_bests = [] dims = sum([ np.prod(np.array(layer['weights']).shape) for layer in classifier.layers.values() ]) print("total number of weights -", dims) lb = bounds[0] ub = bounds[1] # lets set bounds across all dims bounds = [[lb, ub]] * dims manager = Manager() l = Lock() shared_list = manager.list() return_list = manager.list() shared_list = [np.random.uniform(lb, ub, dims), np.inf] all_processes = [] #pso_batch_size = train_data[0].shape[0]//M g_best_weights = None g_best_error = float("inf") classifiers = [ copy.deepcopy(classifier) for _ in range(mini_batch_size // batch_size) ] X_train, y_train = train_data if not mini_batch_size: mini_batch_size = X_train.shape[0] print('starting with gso_batch size - {}, mini_batch_size -{} '.format( batch_size, mini_batch_size)) # create N particles here swarm_inits = [] for j in range(mini_batch_size // batch_size): swarm_init = [] for _ in range(num_particles): swarm_init.append(np.random.uniform(lb, ub, (1, dims))) swarm_inits.append(swarm_init) for i in tqdm(range(epochs)): all_processes = [] sampler = sample_data(X_train, y_train, batch_size, mini_batch_size) for j in range(mini_batch_size // batch_size): pso_train_data = next(sampler) #initial= np.random.uniform(-10,10, 2) # initial starting location [x1,x2...] # swarm_init = [] # for _ in range(num_particles): # swarm_init.append(np.random.uniform(lb, ub, dims)) #pso_train_data = (data[0][k*batch_size:(k+1)*pso_batch_size], data[1][k*batch_size:(k+1)*pso_batch_size]) # print('started batch :',i) # print('train_data length :', len(pso_train_data)) #print('shape of swarm_inits[j][0]: ', swarm_inits[j][0].shape) swarm_init = np.array( [item.reshape(dims, 1) for item in swarm_inits[j]]) p = Process(target=PSO, args=(classifiers[j], bounds, max_iter, shared_list, return_list, l, None, swarm_init, pso_train_data)) all_processes.append(p) start(all_processes) stop(all_processes) #print('elements of return list: ', return_list) main_swarm_init = [item[0] for item in return_list] #swarm_inits = [item[1] for item in return_list] swarm_inits = [main_swarm_init for item in return_list] best_weights, best_error = PSO_purana(classifier, bounds, max_iter, swarm_init=main_swarm_init, train_data=train_data) if best_error < g_best_error: g_best_error = best_error g_best_weights = best_weights print('completed epoch {} --------> loss_value: {}'.format( i, best_error)) prev_index = 0 for layer_id, layer in classifier.layers.items(): num_elements = np.prod( layer['weights'].shape ) # we can cache this and pass it down or store it as layer.num_elements new_weights = g_best_weights[prev_index:prev_index + num_elements] layer['weights'] = new_weights.reshape( layer['weights'].shape ) # changing value midway can cause some error prev_index += num_elements return classifier
class DataLogger(metaclass=Singleton): """Stores and save various type of data under various forms.""" @staticmethod def _futures_callback(future: Future): """Called at future completion.""" if future.exception(): print( f"Future {future} raised the exception {repr(future.exception())}" ) @staticmethod def _push(managed, entry, value, time): """Push method called by the pool executors""" with managed.lockers[entry]: managed.data[entry][time] = value managed.counters[entry] += 1 for f in managed.on_push_callables[entry]: try: f(entry, managed.data[entry], path=managed.path) except Exception as e: logging.getLogger("datalogger").warning( f"{managed.name} DataLogger: function {f} of {entry} failed: {e}" ) @staticmethod def _dump(managed, entry): """Dump method called by the pool executors""" with managed.lockers[entry]: for f in managed.on_dump_callables[entry]: try: f(entry, managed.data[entry], path=managed.path) except Exception as e: logging.getLogger("datalogger").warning( f"{managed.name} DataLogger: function {f} of {entry} failed: {e}" ) @staticmethod def _reset(managed, entry): """Inner reset method called by the pool executor""" with managed.lockers[entry]: for f in managed.on_reset_callables[entry]: try: f(entry, managed.data[entry], path=managed.path) except Exception as e: logging.getLogger("datalogger").warning( f"{managed.name} DataLogger: function {f} of {entry} failed: {e}" ) managed.data[entry].clear() managed.counters[entry] = 0 def __init__(self): # Init and set attributes super(DataLogger, self).__init__() # Managed resources (accessible by remote threads or remote processes) self._manager = Manager() self._managed = self._manager.Namespace() self._managed.name = "data-logger" self._managed.path = "." self._managed.entries = self._manager.list() self._managed.data = self._manager.dict() self._managed.lockers = self._manager.dict() self._managed.counters = self._manager.dict() self._managed.on_push_callables = self._manager.dict() self._managed.on_reset_callables = self._manager.dict() self._managed.on_dump_callables = self._manager.dict() self.tick = datetime.datetime.now() self.futures = list() self.pool = ThreadPoolExecutor(max_workers=1) # Log logging.getLogger("datalogger").info( "{} DataLogger initialized!".format(self._managed.name)) def set_path(self, path): """Sets the root path of the logger. Used by all the handlers that write on disk. :param string path: A valid path to write the data in. """ if len(self._managed.lockers) != 0: raise Exception( "You tried to change logger path after having registered some entries." ) os.makedirs(path, exist_ok=True) self._managed.path = path def set_pool(self, pool, n_par=5): """Sets the executor to be used to call handlers. :param string pool: The type of executor to use to call handlers. Either "thread" or "process". :param int n_par: The number of executor to use. """ if len(self._managed.lockers) != 0: raise Exception( "You tried to pool after having registered some entries.") if pool == "thread": self.pool = ThreadPoolExecutor(max_workers=n_par) elif pool == "process": self.pool = ProcessPoolExecutor(max_workers=n_par) else: raise Exception(f"Unknown pool type `{pool}`") def set_name(self, name): """Sets the name of the logger. :param string name: Name of the logger """ self._managed.name = name def declare(self, entry, on_push_callables, on_dump_callables, on_reset_callables): """Register a recurring log entry. Registering an entry gives access to the `push`, `reset` and `dump` methods. Note that all the handlers must be able to handle the data that will be pushed. :param string entry: Name of the log entry. :param List[handlers] on_push_callables: Handlers called on data when `push` is called. :param List[handlers] on_reset_callables: Handlers called on data when `reset` is called. :param List[handlers] on_dump_callables: Handlers called on the data when `dump` is called. """ if entry in self._managed.entries: raise Exception("You tried to declare an existing log entry") self._managed.entries.append(entry) self._managed.lockers[entry] = self._manager.RLock() self._managed.data[entry] = self._manager.dict() self._managed.counters[entry] = 0 self._managed.on_push_callables[entry] = self._manager.list( on_push_callables) self._managed.on_reset_callables[entry] = self._manager.list( on_reset_callables) self._managed.on_dump_callables[entry] = self._manager.list( on_dump_callables) if os.path.dirname(entry) != "": os.makedirs(os.path.join(self._managed.path, os.path.dirname(entry)), exist_ok=True) def push(self, entry, value, time=None): """Append data to a recurring log. All handlers registered for the `on_push` event will be called. :param string entry: Name of the log entry :param Any value: Object containing the data to log. Should be of same type from call to call... :param int or None time: Date of the logging (epoch, iteration, tic ...). Will be used as key in the data dictionary. If `None`, the last data key plus one will be used. """ future = self.pool.submit( DataLogger._push, self._managed, entry, value, time if time is not None else self._managed.counters[entry]) future.add_done_callback(DataLogger._futures_callback) self.futures.append(future) def dump(self): """Calls handlers declared for `on_dump` event, for all registered log entries. """ for entry in self._managed.entries: future = self.pool.submit(DataLogger._dump, self._managed, entry) future.add_done_callback(DataLogger._futures_callback) self.futures.append(future) def reset(self, entry): """Resets the data of a recurring log entry. All handlers registered for the `on_reset` event will be called before the storage is emptied. :param string entry: name of the log entry. """ future = self.pool.submit(DataLogger._reset, self._managed, entry) future.add_done_callback(DataLogger._futures_callback) self.futures.append(future) def get_entry_length(self, entry): """Retrieves the number of data saved for a log entry. :param string entry: Name of the log entry :return: Number of data pieces in the entry storage :rtype: int """ return self._managed.counters[entry] def get_serie(self, entry): """Returns the data in a list ordered by keys. :param string entry: Name of the log entry :return: Serie of data ordered by key :rtype: List[any] """ return [i[1] for i in sorted(self._managed.data[entry].items())] def wait(self, log_durations=True): """Wait for the handling queue to be emptied. :param bool log_durations: Whether to log the wait duration. """ b = datetime.datetime.now() while True: self.futures = list(filter(lambda x: not x.done(), self.futures)) if self.futures: time.sleep(.1) else: break if log_durations: logging.getLogger("datalogger").info( f"{self._managed.name} DataLogger: Last wait occured {self.tick - b} ago." ) logging.getLogger("datalogger").info( f"{self._managed.name} DataLogger: Waited {datetime.datetime.now() - b} for completion." ) self.tick = datetime.datetime.now()
def build_opts(opts): """Trigger a new process that builds the workflow graph, based on the input options.""" import os from pathlib import Path import logging import sys import gc import warnings from multiprocessing import set_start_method, Process, Manager from nipype import logging as nlogging from niworkflows.utils.misc import check_valid_fs_license set_start_method("forkserver") logging.addLevelName( 25, "IMPORTANT") # Add a new level between INFO and WARNING logging.addLevelName(15, "VERBOSE") # Add a new level between INFO and DEBUG logger = logging.getLogger("cli") def _warn_redirect(message, category, filename, lineno, file=None, line=None): logger.warning("Captured warning (%s): %s", category, message) warnings.showwarning = _warn_redirect # Precedence: --fs-license-file, $FS_LICENSE, default_license if opts.fs_license_file is not None: os.environ["FS_LICENSE"] = os.path.abspath(opts.fs_license_file) if not check_valid_fs_license(): raise RuntimeError( "ERROR: a valid license file is required for FreeSurfer to run. " "sMRIPrep looked for an existing license file at several paths, in this " "order: 1) command line argument ``--fs-license-file``; 2) ``$FS_LICENSE`` " "environment variable; and 3) the ``$FREESURFER_HOME/license.txt`` path. " "Get it (for free) by registering at https://" "surfer.nmr.mgh.harvard.edu/registration.html") # Retrieve logging level log_level = int(max(25 - 5 * opts.verbose_count, logging.DEBUG)) # Set logging logger.setLevel(log_level) nlogging.getLogger("nipype.workflow").setLevel(log_level) nlogging.getLogger("nipype.interface").setLevel(log_level) nlogging.getLogger("nipype.utils").setLevel(log_level) errno = 0 # Call build_workflow(opts, retval) with Manager() as mgr: retval = mgr.dict() p = Process(target=build_workflow, args=(opts, retval)) p.start() p.join() if p.exitcode != 0: sys.exit(p.exitcode) smriprep_wf = retval["workflow"] plugin_settings = retval["plugin_settings"] bids_dir = retval["bids_dir"] output_dir = retval["output_dir"] subject_list = retval["subject_list"] run_uuid = retval["run_uuid"] retcode = retval["return_code"] if smriprep_wf is None: sys.exit(1) if opts.write_graph: smriprep_wf.write_graph(graph2use="colored", format="svg", simple_form=True) if opts.reports_only: sys.exit(int(retcode > 0)) if opts.boilerplate: sys.exit(int(retcode > 0)) # Check workflow for missing commands missing = check_deps(smriprep_wf) if missing: print("Cannot run sMRIPrep. Missing dependencies:") for iface, cmd in missing: print("\t{} (Interface: {})".format(cmd, iface)) sys.exit(2) # Clean up master process before running workflow, which may create forks gc.collect() try: smriprep_wf.run(**plugin_settings) except RuntimeError: errno = 1 else: if opts.run_reconall: from templateflow import api from niworkflows.utils.misc import _copy_any dseg_tsv = str( api.get("fsaverage", suffix="dseg", extension=[".tsv"])) _copy_any( dseg_tsv, str(Path(output_dir) / "smriprep" / "desc-aseg_dseg.tsv")) _copy_any( dseg_tsv, str(Path(output_dir) / "smriprep" / "desc-aparcaseg_dseg.tsv")) logger.log(25, "sMRIPrep finished without errors") finally: from niworkflows.reports import generate_reports from ..utils.bids import write_derivative_description, write_bidsignore logger.log(25, "Writing reports for participants: %s", ", ".join(subject_list)) # Generate reports phase errno += generate_reports(subject_list, output_dir, run_uuid, packagename="smriprep") write_derivative_description(bids_dir, str(Path(output_dir) / "smriprep")) write_bidsignore(Path(output_dir) / "smriprep") sys.exit(int(errno > 0))
# Send frame to global write_frame_list[worker_id] = frame_process # Expect next worker to write frame Global.write_num = next_id(Global.write_num, worker_num) if __name__ == '__main__': # Fix Bug on MacOS if platform.system() == 'Darwin': set_start_method('forkserver') # Global variables Global = Manager().Namespace() Global.buff_num = 1 Global.read_num = 1 Global.write_num = 1 Global.frame_delay = 0 Global.is_exit = False read_frame_list = Manager().dict() write_frame_list = Manager().dict() # Number of workers (subprocess use to process frames) if cpu_count() > 2: worker_num = cpu_count() - 1 # 1 for capturing frames else: worker_num = 2 # Subprocess list
def main(): print('blas -', dlib.DLIB_USE_BLAS) print('cuda -', dlib.DLIB_USE_CUDA) print('lapack -', dlib.DLIB_USE_LAPACK) print('avx -', dlib.USE_AVX_INSTRUCTIONS) print('neon -', dlib.USE_NEON_INSTRUCTIONS) manager = Manager() #количество лиц count = Value('i', 0) time_det = Value('d', 0.0) time_count = Value('d', 0.0) dets_q = manager.Queue(1) images_q = manager.Queue(25) #очередь для процесса детектирования q_for_detproc = manager.Queue(1) #очередь для процесса распознования и подсчета q_for_countproc = manager.Queue(1) Process(target=counting_process, args=(q_for_countproc, count, time_count), daemon=True).start() #Process(target=capturing_process, args=(images_q, q_for_detproc), daemon=True).start() Process(target=detecting_process, args=(q_for_detproc, dets_q, q_for_countproc, time_det), daemon=True).start() font = cv2.FONT_HERSHEY_SIMPLEX counter = 0 trackers = [] cap = cv2.VideoCapture(0) # cap.set(3,900) # cap.set(4,900) while True: if cap.isOpened(): img = cap.read()[1] #img = imutils.resize(img, width=1000) if images_q.full(): images_q.get() images_q.put(img) else: images_q.put(img) if q_for_detproc.empty(): q_for_detproc.put(img) else: break if images_q.qsize() == 24: frame = images_q.get() rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if counter % 25 == 0: counter = 0 trackers = [] if not dets_q.empty(): dets, rgb = dets_q.get() for d in dets: tracker = dlib.correlation_tracker() tracker.start_track(rgb, d) trackers.append(tracker) elif len(trackers) > 0: for tracker in trackers: confidence = tracker.update(rgb) if confidence > 7: drect = tracker.get_position() left, top, right, bottom = tuple( map(int, (drect.left(), drect.top(), drect.right(), drect.bottom()))) cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) counter += 1 height, width = frame.shape[:2] cv2.putText(frame, str(count.value), (width - 100, height - 100), font, 4, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow('img', frame) k = cv2.waitKey(25) & 0xff if k == 27: break print('detecting time = ', time_det.value) print('counting time = ', time_count.value) cap.release()
print('Using default iterations', iterations) # read data file data_array = np.loadtxt(filename, delimiter=',') # number of training examples and features number_of_examples = data_array.shape[0] number_of_parameters = data_array.shape[1] # add column of ones to dataframe make looping symmetric ones = np.ones(number_of_examples) data_array = np.insert(data_array, 0, values=ones, axis=1) # initialize theta to zeroes array theta = np.zeros(number_of_parameters) parameters_split = np.array_split(list(range(len(theta))), 4) if __name__ == '__main__': manager = Manager() results = manager.dict() start = time.time() for i in range(iterations): processes = [] # multiple process to caculate theta (4 sublist of theta) for j in range(len(parameters_split)): processes.append(Process(target=gradientDescent, args=(parameters_split[j], results))) theta_diff = [] for p in processes: p.start() for p in processes: p.join() # here to merge for j in range(len(theta)): theta_diff.append(results[j])
def one_run(config): test_environment = environment.EnvRoad(config) road_environment = environment.EnvRoad(config) memory_queue = Queue() update_p_queue = Queue() batch_queue = Queue() environment_queue = Queue() test_environment_queue = Queue() agent_performance_queue = Queue() learner_performance_queue = Queue() test_agent_performance_queue = Queue() priority_environment_queue = Queue() manger_weights = Manager() weights = manger_weights.dict() lock_weights = manger_weights.Lock() p_agent = Process(target=thread_agent, args=(config, road_environment, memory_queue, environment_queue, agent_performance_queue, weights, lock_weights)) p_learn = Process(target=thread_learner, args=(config, batch_queue, learner_performance_queue, update_p_queue, weights, lock_weights)) p_mem = Process(target=thread_memory, args=(config, memory_queue, batch_queue, update_p_queue, priority_environment_queue)) p_agent_test = Process(target=thread_agent_test, args=(config, test_environment, test_environment_queue, test_agent_performance_queue, weights, lock_weights, memory_queue)) p_agent_priority = Process( target=thread_agent_priority, args=(config, test_environment, priority_environment_queue, memory_queue, test_environment_queue, weights, lock_weights)) start_time = time.time() p_agent.start() p_mem.start() p_learn.start() p_agent_test.start() p_agent_priority.start() if config.display: p_display_game = Process(target=thread_game_display, args=(road_environment, environment_queue)) p_display_test = Process(target=thread_test_display, args=(test_environment, test_environment_queue)) p_display_game.start() p_display_test.start() display_agent = displays.AgentDisplays(config) display_learner = displays.LearnerDisplays(config) display_test = displays.TestDisplays(config) learner_performance = [[], [], []] agent_performance = [[], []] test_agent_performance = [[], []] end = False # Display all performance metrics while True: while agent_performance_queue.qsize() > 0: agent_performance = agent_performance_queue.get() if len(agent_performance[0]) == len( agent_performance[1]) and config.display: display_agent.render(agent_performance[0], agent_performance[1]) if len(agent_performance[0]) > config.max_step: end = True while learner_performance_queue.qsize() > 0: learner_performance = learner_performance_queue.get() if len(learner_performance[0]) == len( learner_performance[1]) == len( learner_performance[2]) and config.display: display_learner.render(learner_performance[0], learner_performance[1], learner_performance[2]) while test_agent_performance_queue.qsize() > 0: test_agent_performance = test_agent_performance_queue.get() if len(test_agent_performance[0]) == len( test_agent_performance[1]) and config.display: display_test.render(test_agent_performance[0], test_agent_performance[1]) if end: if config.display: displays.close_all() p_display_game.terminate() p_display_test.terminate() break p_agent.terminate() p_mem.terminate() p_learn.terminate() p_agent_test.terminate() p_agent_priority.terminate() print('Average time per step: ', str((time.time() - start_time) / config.max_step)) return agent_performance[0], agent_performance[1], learner_performance[0], learner_performance[1], \ learner_performance[0], learner_performance[2], test_agent_performance[0], test_agent_performance[1]
from multiprocessing import Queue, Manager def weight(): weight = [] dis_with_w = [] for i in range(0, 2000): weight.append( (npy.max(npy.std(vectors, axis=0)) - npy.std(vectors[:, i])) / (npy.sum( npy.max(npy.std(vectors, axis=0)) - npy.std(vectors, axis=0)))) return weight if __name__ == "__main__": manager = Manager() queue = manager.Queue() vectors = npy.ones((1, 2000)) queue.put(vectors) for x in range(0, 40): # 30 -> pos,10 -> baseline mfcc = MFCC('/vibration.wav', queue) play = Play() play.start() time.sleep(0.16) mfcc.start() play.join() mfcc.join() time.sleep(1) if x == 29: print("请抬起手")
def main(): """ - Load all the Jupiter confuguration - Load DAG information. - Prepare all of the tasks based on given DAG information. - Prepare the list of children tasks for every parent task - Generating monitoring process for ``INPUT`` folder. - Generating monitoring process for ``OUTPUT`` folder. - If there are enough input files for the first task on the current node, run the first task. """ global logging logging.basicConfig(level=logging.DEBUG) INI_PATH = '/jupiter_config.ini' config = configparser.ConfigParser() config.read(INI_PATH) global dag dag_file = '/centralized_scheduler/dag.txt' dag_info = k8s_read_dag(dag_file) dag = dag_info[1] # Prepare transfer-runtime file: global runtime_sender_log, RUNTIME, TRANSFER, transfer_type RUNTIME = int(config['CONFIG']['RUNTIME']) TRANSFER = int(config['CONFIG']['TRANSFER']) global app_name, app_option app_name = os.environ['APP_NAME'] app_option = os.environ['APP_OPTION'] if TRANSFER == 0: transfer_type = 'scp' runtime_sender_log = open( os.path.join(os.path.dirname(__file__), 'runtime_transfer_sender.txt'), "w") s = "{:<10} {:<10} {:<10} {:<10} \n".format('Node_name', 'Transfer_Type', 'File_Path', 'Time_stamp') runtime_sender_log.write(s) runtime_sender_log.close() runtime_sender_log = open( os.path.join(os.path.dirname(__file__), 'runtime_transfer_sender.txt'), "a") #Node_name, Transfer_Type, Source_path , Time_stamp if RUNTIME == 1: global runtime_receiver_log runtime_receiver_log = open( os.path.join(os.path.dirname(__file__), 'runtime_transfer_receiver.txt'), "w") s = "{:<10} {:<10} {:<10} {:<10} \n".format('Node_name', 'Transfer_Type', 'File_path', 'Time_stamp') runtime_receiver_log.write(s) runtime_receiver_log.close() runtime_receiver_log = open( os.path.join(os.path.dirname(__file__), 'runtime_transfer_receiver.txt'), "a") #Node_name, Transfer_Type, Source_path , Time_stamp global FLASK_SVC, FLASK_DOCKER, MONGO_PORT, username, password, ssh_port, num_retries, task_mul, count_dict, self_ip, home_ips, home_ids FLASK_DOCKER = int(config['PORT']['FLASK_DOCKER']) FLASK_SVC = int(config['PORT']['FLASK_SVC']) MONGO_PORT = int(config['PORT']['MONGO_DOCKER']) username = config['AUTH']['USERNAME'] password = config['AUTH']['PASSWORD'] ssh_port = int(config['PORT']['SSH_SVC']) num_retries = int(config['OTHER']['SSH_RETRY_NUM']) self_ip = os.environ['OWN_IP'] home_nodes = os.environ['HOME_NODE'].split(' ') home_ids = [x.split(':')[0] for x in home_nodes] home_ips = [x.split(':')[1] for x in home_nodes] global taskmap, taskname, taskmodule, filenames, files_out, home_node_host_ports global all_nodes, all_nodes_ips, self_id, self_name, self_task global all_computing_nodes, all_computing_ips, node_ip_map, controller_id_map configs = json.load(open('/centralized_scheduler/config.json')) taskmap = configs["taskname_map"][sys.argv[len(sys.argv) - 1]] taskname = taskmap[0] global tasks, task_order, super_tasks, non_tasks tasks, task_order, super_tasks, non_tasks = get_taskmap() global controller_nondag, controller_ip_nondag controller_nondag = [] controller_ip_nondag = [] global all_nodes_list, all_nodes_ips_list all_nodes_list = os.environ['ALL_NODES'].split(':') all_nodes_ips_list = os.environ['ALL_NODES_IPS'].split(':') all_computing_nodes = os.environ["ALL_COMPUTING_NODES"].split(":") all_computing_ips = os.environ["ALL_COMPUTING_IPS"].split(":") all_nodes = all_computing_nodes + home_ids all_nodes_ips = all_computing_ips + home_ips global BOKEH_SERVER, BOKEH_PORT, BOKEH BOKEH_SERVER = config['BOKEH_LIST']['BOKEH_SERVER'] BOKEH_PORT = int(config['BOKEH_LIST']['BOKEH_PORT']) BOKEH = int(config['BOKEH_LIST']['BOKEH']) for idx, controller in enumerate(all_nodes_list): if controller in super_tasks: logging.debug(controller) logging.debug(all_nodes_ips_list[idx]) controller_nondag.append(controller) controller_ip_nondag.append(all_nodes_ips_list[idx]) all_nodes.append(controller) all_nodes_ips.append(all_nodes_ips_list[idx]) if taskmap[1] == True: taskmodule = __import__(taskname) #target port for SSHing into a container filenames = [] files_out = [] self_name = os.environ['NODE_NAME'] self_id = os.environ['NODE_ID'] self_task = os.environ['TASK'] controller_id_map = self_task + "#" + self_id home_node_host_ports = [x + ":" + str(FLASK_SVC) for x in home_ips] node_ip_map = dict(zip(all_nodes, all_nodes_ips)) global dest_node_host_port_list dest_node_host_port_list = [ ip + ":" + str(FLASK_SVC) for ip in all_computing_ips ] global task_price_cpu, task_node_summary, task_price_mem, task_price_queue, task_price_net manager = Manager() task_price_cpu = manager.dict() task_price_mem = manager.dict() task_price_queue = manager.dict() task_price_net = manager.dict() task_node_summary = manager.dict() global pass_time pass_time = dict() # Set up default value for task_node_summary: the task controller will perform the tasks also _thread.start_new_thread(push_controller_map, ()) web_server = MonitorRecv() web_server.run() if taskmap[1] == True: task_mul = manager.dict() count_dict = manager.dict() else: path_src = "/centralized_scheduler/" + taskname args = ' '.join(str(x) for x in taskmap[2:]) if os.path.isfile(path_src + ".py"): cmd = "python3 -u " + path_src + ".py " + args else: cmd = "sh " + path_src + ".sh " + args os.system(cmd)
def worker_process(server, dict_proxy, queue_proxy): thread_pool = ThreadPool(cpu_count() * 2) while True: conncetion, remote_address = server.accept() data = "".encode() login_try(conncetion, thread_pool, dict_proxy, queue_proxy, data) if __name__ == '__main__': server = socket.socket() server.bind(('127.0.0.1', 9999)) server.listen(1000) mgr = Manager() dict_proxy = mgr.dict() #用来保存连接上来的客户端, queue_proxy = mgr.Queue() #把客户端发过来的消息通过队列传递 n = cpu_count() #打印当前电脑的cpu核数 process_pool = Pool(n) for i in range(n - 1): #充分利用CPU,为每一个CPU分配一个进程 process_pool.apply_async(worker_process, args=(server, dict_proxy, queue_proxy)) #把server丢到两个进程里面 process_pool.apply_async(send_data, args=(dict_proxy, queue_proxy)) #用一个进程去收发消息 process_pool.close() process_pool.join()
def extract_descriptors_from_file_to_pickle(inputfile, outputfile, num_pos_sample=0): print("Working on: " + str(inputfile)) print(" ") s_read_seq = time.time() if reduce_by_similarity == 1: if "_reduced" in inputfile: print("File already reduced to be maximum 90 percent identical! Clear reduce_by_similarity!") input() elif ".txt" in inputfile: name = inputfile.replace('.txt', '') file_to_reduce = open(inputfile) lines = file_to_reduce.readlines() if num_pos_sample != 0: lines = lines[:round(sc_1*num_pos_sample)] line_number = len(lines) file_to_reduce.close() elif ".fasta" in inputfile: name = inputfile.replace('.fasta', '') lines = IO.read_fasta_file(inputfile) lines = [str(line) for line in lines] if num_pos_sample != 0: lines = lines[:round(sc_1*num_pos_sample)] line_number = len(lines) else: print("Unknown file format! Use .fasta or .txt! Press CTRL-C to exit") input() out = name + "_reduced.txt" deleted = [] sim_array = np.zeros((line_number, line_number)) for i in list(range(line_number)): print("Doing line %d out of %d" %(i, line_number)) string1 = lines[i].strip() for j in list(range(i+1, line_number)): #print(j) string2 = lines[j].strip() if similar(string1, string2) >= 0.9: sim_array[i,j] = 1 sim_array[j,i] = 1 while np.sum(np.sum(sim_array, 0)) != 0: sum_arr = np.sum(sim_array, 0) idx_to_be_deleted = np.argmax(sum_arr) sim_array = np.delete(sim_array, idx_to_be_deleted, 0) sim_array = np.delete(sim_array, idx_to_be_deleted, 1) deleted.append(lines[idx_to_be_deleted]) del lines[idx_to_be_deleted] print("Deleted items:") [print(item) for item in deleted] f = open(out, "w+") for line in lines: f.write(line) f.write("\n") f.close() inputfile = out if ".txt" in inputfile: seqs = [] with open(inputfile) as f: for line in f: seqs.append(line.strip()) #strip is important otherwis /n issue! inputfile = inputfile.replace("_reduced.txt", "") elif ".fasta" in inputfile: seqs = IO.read_fasta_file(inputfile) inputfile = inputfile.replace("_reduced.fasta", "") else: print("Unknown file format! Use .fasta or .txt! Press CTRL-C to exit") input() e_read_seq = time.time() print("Total time to read sequences: " + str(e_read_seq - s_read_seq)) print(str(len(seqs))) chars = set('ARNDCQEGHILKMFPSTWYV') if inputfile in negfile: if num_pos_sample == 0: print ("Error, use Ctrl-C to quit") input() print(num_pos_sample) if num_pos_sample > len(seqs): print("Warning: Class imbalance may not be achieved! Click any button to accept or CTRL-C to exit") input() a = random.sample(range(1, len(seqs)), round(sc_2*num_pos_sample)) #if total_samples is big, you may want to divide total_samples (by 18) and round it newseqs = [] i = 1 for number in a: print(i) if len(seqs[number]) > minlength and all((c in chars) for c in seqs[number].upper()): newseqs.append(seqs[number]) print(seqs[number]) i = i+1 if i > num_pos_sample: break seqs = newseqs #s_x_desc = time.time() dvecs = Manager().list() current_seq = Value('i', 1) dropped = 0 lock = Lock() seqs = [s.upper() for s in seqs] mask = [all((c in chars) for c in s) and len(s) > minlength for s in seqs] seqs = list(compress(seqs, mask)) total_samples = len(seqs) pool = Pool(numcores, initializer, (current_seq, dvecs, total_samples, lock)) s_parallel = time.time() pool.map(thefunction, seqs) e_parallel = time.time() #pool.close() #pool.join() print("Total time to extract descriptors: " + str(e_parallel - s_parallel)) if inputfile in posfile: num_pos_sample = len(dvecs) print("Number of positive samples: %d" %(num_pos_sample)) #e_x_desc = time.time() #print("Total time to extract descriptors: " + str(e_x_desc - s_x_desc)) print("Number of samples dropped due to meaningless characters: %d" %(dropped)) y = dvecs._callmethod('__getitem__', (slice(1, total_samples+1),)) #THIS IS THE SOLUTION TO MAKE PICKLE WORK!!!!!! IO.serialize_descriptor_vector(y, o_file=outputfile) return num_pos_sample
class ReaderWriterLock(object): def __init__(self): self.num_readers_lock = Manager().Lock() self.writers_lock = Manager().Lock() self.num_readers = 0 self.now_writing = False def some_worker_is_reading(self): return self.num_readers > 0 def some_worker_is_writing(self): return self.now_writing is True def lock_writing_and_reading(self): self.writers_lock.acquire() # first things first - block all other writers self.now_writing = True # block new readers who haven't started reading yet while self.some_worker_is_reading(): # let existing readers finish their homework time.sleep(0.05) def release_writing_and_reading(self): self.now_writing = False # release readers - guarantee no readers starvation self.writers_lock.release() # release writers def lock_writing(self): while self.now_writing: time.sleep(0.05) self.num_readers_lock.acquire() self.num_readers += 1 self.num_readers_lock.release() def release_writing(self): self.num_readers_lock.acquire() self.num_readers -= 1 self.num_readers_lock.release()
def sub_cmd_multisearch(args): if not (args.m and args.sc): exit(1) config = xq.get_strategy_config(args.sc) pprint.pprint(config) module_name = config["module_name"].replace("/", ".") class_name = config["class_name"] symbol = config['symbol'] md = DBMD(args.m, kl.KLINE_DATA_TYPE_JSON) start_time, end_time = get_time_range(md, symbol, args.r) count = args.count cpus = cpu_count() print("count: %s, cpus: %s" % (count, cpus) ) result_q = Manager().Queue()#Manager中的Queue才能配合Pool task_q = Manager().Queue()#Manager中的Queue才能配合Pool for index in range(count): task_q.put(index) print('Parent process %s.' % os.getpid()) p = Pool(cpus) for i in range(cpus): #p.apply_async(child_process_test, args=(i, task_q, result_q)) p.apply_async(child_process, args=(i, task_q, result_q, args.m, config, module_name, class_name, start_time, end_time)) print('Waiting for all subprocesses done...') p.close() start_time = datetime.now() result = [] while len(result) < count: if result_q.empty(): time.sleep(1) else: value = result_q.get() print("result value: ", value) result.append(value) sys.stdout.write( " %d/%d, cost: %s, progress: %g%% \r" % ( len(result), count, datetime.now() - start_time, round((len(result) / count) * 100, 2) ) ) sys.stdout.flush() print("") #print("result queue(len: %s)" % (result_q.qsize())) p.join() print('All subprocesses done.') sorted_rs = sorted(result, key=lambda x: x[1][0], reverse=True) for r in sorted_rs: #print("r: ", r) info = "%6s %30s %s " % r print(info)
def do_work(in_queue, out_list): while True: art = in_queue.get() if art == None: break else: result = multipro(art) out_list.append(result) # return result import itertools num_workers = 3 manager = Manager() results = manager.list() work = manager.Queue(num_workers) pool_lst = [] for i in range(num_workers): p = Process(target=do_work, args=(work, results)) pool_lst.append(p) p.start() articles = articles*5000000 articles = itertools.chain(articles, (None,)*num_workers) for i in articles: work.put(i)
def __init__(self): self.num_readers_lock = Manager().Lock() self.writers_lock = Manager().Lock() self.num_readers = 0 self.now_writing = False
def __init__(self, host=None, username=None, password=None, fresh=False): # database self.host = host self.username = username self.password = password self.databaseName = 'XRP_Ledger' self.collectionsList = ['accounts', 'transactions'] self.collections = {} self.edgeCollectionsList = ['transactionOutput'] self.edgeCollections = {} # processes self.maxProcess = int(cpu_count() / 2) self.batchSize = 500 self.maxQueueSize = self.batchSize * self.maxProcess # queue self.accountsQueue = Manager().Queue(maxsize=self.maxQueueSize) self.transactionsQueue = Manager().Queue(maxsize=self.maxQueueSize) self.transactionsOutputQueue = Manager().Queue( maxsize=self.maxQueueSize) # tracking self.lastStoredSeq = None # create connection try: conn = Connection(arangoURL=host, username=username, password=password) except ConnectionError: print("Unable to establish connection to the database") sys.exit(1) # setup database try: db = conn.createDatabase(name=self.databaseName) except CreationError: db = conn[self.databaseName] if fresh: for collection in self.collectionsList + self.edgeCollectionsList: if db.hasCollection(collection): db.collections[collection].delete() db.reload() # setup collections for collection in self.collectionsList: if not db.hasCollection(collection): db.createCollection(name=collection, className='Collection') # setup edge collections for edge in self.edgeCollectionsList: if not db.hasCollection(edge): db.createCollection(name=edge, className='Edges') # set last processed ledger seq aql = "FOR tx IN transactions SORT tx.LedgerIndex DESC LIMIT 1 RETURN tx.LedgerIndex" queryResult = db.AQLQuery(aql, rawResults=True) if len(queryResult) > 0: self.lastStoredSeq = queryResult[0] # run the threads self.processes = [] for i in range(self.maxProcess): self.processes.append( BulkInsert(self.get_connection('accounts'), self.accountsQueue, self.batchSize)) self.processes.append( BulkInsert(self.get_connection('transactions'), self.transactionsQueue, self.batchSize)) self.processes.append( BulkInsert(self.get_connection('transactionOutput'), self.transactionsOutputQueue, self.batchSize)) for t in self.processes: t.start()
import os from time import sleep def fuc_w(num, queue): print('start {0}:{1}'.format(num, os.getpid())) for i in range(5): queue.put(i) sleep(1) print('finish %s' % num) def fuc_r(num, queue): print('start {0}:{1}'.format(num, os.getpid())) if not queue.empty(): for i in range(queue.qsize()): print(queue.get(block=True, timeout=5)) sleep(1) print('finish %s' % num) if __name__ == '__main__': print('main process:%s' % os.getpid()) # 指定队列大小,空则为无限 queue = Manager().Queue(5) pool = Pool(processes=5) pool.apply(func=fuc_w, args=('write', queue)) pool.apply(func=fuc_r, args=('read', queue)) pool.close() pool.join()
if __name__ == "__main__": test_1 = False test_2 = True if test_1: mean_vals = [] for i in range(10): latency_dict = [] iter_val = 100 in_order(latency_dict, iter_val) mean_val = sum(latency_dict) / len(latency_dict) mean_vals.append(mean_val) out = sum(mean_vals) / len(mean_vals) print(mean_vals) print("MEAN ONE REQUEST", out) if test_2: manager = Manager() return_dict = manager.list() # pool_size = 100 # your "parallelness" # data to be sent to api num_requests = [1] for nr in num_requests: return_dict = manager.list() concurrent_vals(nr) max_val = test_max(return_dict) avg_val = test_avg(return_dict) # print(return_dict) print("NUM_REQUESTS", nr) print("MAX_VAL", max_val) print("AVG_VAL", avg_val)
def configure_parallel(**kwargs): basic_conf, modules_fn, summary_writer = basic_configure(**kwargs) jobdir = basic_conf['jobdir'] env_spec = basic_conf['env_spec'] T = env_spec['T'] seed = basic_conf['seed'] num_worker = basic_conf['num_worker'] env_fn = modules_fn['env_fn'] tasks_fn = modules_fn['tasks_fn'] policies_fn = modules_fn['policies_fn'] gnets_fn = modules_fn['gnets_fn'] task_selector_fn = modules_fn['task_selector_fn'] task_planner_fn = modules_fn['task_planner_fn'] forward_model_fn = modules_fn['forward_model_fn'] rollout_worker_fn = modules_fn['rollout_worker_fn'] ########################## # Continue training # ########################## restart_after = get_parameter('restart_after', params=kwargs, default=None) continued_params = {} if restart_after is not None and os.path.exists(os.path.join(jobdir, 'restart')): with open(os.path.join(jobdir, 'parallel_params.json'), 'r') as f: continued_params = json.load(f) ########################## # Load external config # ########################## parallel_params_path = get_parameter('basic_params_path', params=continued_params, default=None) parallel_params_path = get_parameter('basic_params_path', params=kwargs, default=parallel_params_path) external_params = {} if parallel_params_path is not None: with open(parallel_params_path, 'r') as f: external_params = json.load(f) ################################### # Prepare shared memory manager # ################################### manager = Manager() episode = manager.dict() info = manager.dict() managed_memory = dict( episode=episode, info=info, ) ########################## # Prepare rollout worker # ########################## parallel_rollout_manager_params = dict( num_worker=num_worker, ) update_default_params(parallel_params_path, external_params.get('parallel_rollout_manager_params', {})) update_default_params(parallel_params_path, continued_params.get('parallel_rollout_manager_params', {})) update_default_params(parallel_rollout_manager_params, kwargs.get('parallel_rollout_manager_params', {})) parallel_rollout_manager = ParallelRolloutManager(env_spec, env_fn, tasks_fn, policies_fn, gnets_fn, task_selector_fn, task_planner_fn, forward_model_fn, rollout_worker_fn, T, managed_memory=managed_memory, **parallel_rollout_manager_params) ########################## # Prepare train worker # ########################## parallel_train_manager = ParallelTrainManager(managed_memory=managed_memory, summary_writer=summary_writer, jobdir=jobdir, seed=seed) [parallel_train_manager.add_module(policy_fn) for policy_fn in policies_fn] [parallel_train_manager.add_module(task_fn) for task_fn in tasks_fn] parallel_train_manager.add_module(forward_model_fn) [parallel_train_manager.add_module(gnets_fn[i][j]) for i in range(len(tasks_fn)) for j in range(len(tasks_fn))] parallel_train_manager.add_module(task_selector_fn) parallel_train_manager.add_module(task_planner_fn) ########################## # Load external params # ########################## params_path = basic_conf['params_path'] params_prefix = basic_conf['params_prefix'] if params_path: params_path = params_path if params_path else jobdir try: parallel_train_manager.load_global_params(path=params_path, prefix=params_prefix) logger.info(f'Restored params from {params_path} with prefix {params_prefix}') except: logger.warning('Could not restore params') raise ########################## # Continue training # ########################## if basic_conf['restart_after'] is not None and os.path.exists(os.path.join(jobdir, 'restart')): try: parallel_train_manager.load_global_params(path=jobdir, prefix='latest') parallel_train_manager.restore_buffer(path=jobdir, prefix='latest') logger.info(f'Restored params from {params_path} with prefix {params_prefix}') except: logger.warning('Could not restore params') raise params = dict( parallel_rollout_manager_params=parallel_rollout_manager_params, ) with open(os.path.join(jobdir, 'parallel_params.json'), 'w') as f: json.dump(params, f) return parallel_rollout_manager, parallel_train_manager, basic_conf, params, modules_fn, managed_memory, summary_writer
def other(self, *args): self.request.sendall('error command: {}'.format(' '.join(args[0]))) def run_server(host, port, request_q_size=50): socketserver.ThreadingTCPServer.allow_reuse_address = True server = socketserver.ThreadingTCPServer((host, port), MYHandler) server.request_queue_size = request_q_size print('start server on {}:{}'.format(host, port)) with server: server.serve_forever() if __name__ == '__main__': manager = Manager() queue = manager.Queue(5) error_ip_list = manager.list() process_list = [] pull_ip_num = 50 order_id = '' api_url = 'http://dps.kdlapi.com/api/getdps/?orderid={}&num={}&area=%E6%B2%B3%E5%8D%97%2C%E5%90%89%E6%9E%97%2C%E5%B1%B1%E8%A5%BF%2C%E5%B1%B1%E4%B8%9C%2C%E6%B9%96%E5%8C%97%2C%E5%86%85%E8%92%99%E5%8F%A4%2C%E5%AE%89%E5%BE%BD%2C%E7%94%98%E8%82%83%2C%E5%AE%81%E5%A4%8F%2C%E5%9B%9B%E5%B7%9D%2C%E5%B9%BF%E8%A5%BF&pt=1&dedup=1&sep=2'.format( order_id, pull_ip_num) signature = '' check_ip_expired_url = 'https://dps.kdlapi.com/api/checkdpsvalid?orderid={}&signature={}&proxy='.format(order_id, signature) get_ip_balance = 'https://dps.kdlapi.com/api/getipbalance?orderid={}&signature={}'.format(order_id, signature) operate_ip = OperateIP(api_url, check_ip_expired_url, get_ip_balance) get_ip_process = Process(target=operate_ip.get_ip_list) process_list.append(get_ip_process) check_ip_process = Process(target=operate_ip.check_error_ip)
def Run_model(data, D, Choose_P, training_step, temp, d, detection_key, local_K): if detection_key == 1: D = 1 print('Start point-to-point anomaly detection method... ') if len(Choose_P) == 0: print('Detection target: all variables!') else: print('Detection target: specific variables!') if detection_key == 2: print('Start interval anomaly detection method... ') if len(Choose_P) == 0: print('Detection target: all variables!') else: print('Detection target: specific variables!') if detection_key == 3: D = 1 print('Start local anomaly detection method... ') if len(Choose_P) == 0: print('Detection target: all variables!') else: print('Detection target: specific variables!') data = Normalize_function(data) # Data normalization data = del_zero_matrix(data) # Delete useless variables N = len(data[0]) #Data length, which is the number of nodes K_temp = np.identity(N) # Initialize a unit vector P = len(data) #Number of variables if len( Choose_P ) == 0: #If it is empty, it means that there is no choice, and all are selected by default Choose_P = np.arange( 0, P) #Select the variable information to be focused on print('The system detects ' + str(len(Choose_P)) + 'target variables and starts ' + str(len(Choose_P)) + 'processes ...') jobs = [] #Storage process common_data = Manager().list( ) # Here is a shared variable that declares a list for i in range(len(Choose_P)): #Start the corresponding process p = Process(target=Aligned_kernel_matrix, args=(i, data, N, D, P, Choose_P[i], temp, detection_key, local_K, common_data)) #Share the common_data variable jobs.append(p) p.start() #Start process for proc in jobs: proc.join( ) #Use blocking to wait for all processes to end before proceeding K_temp = np.identity(N) # Initialize a unit vector for i in range(len(Choose_P)): K_temp = np.matmul(K_temp, common_data[i]) #Multiply all matrices S = Normalize_matrix(K_temp) # Normalized print('The calculation is over... ') c = np.array([1] * N) c = Train_model(S, c, d, N, training_step) return c
def fill_volume_with_model(model_file, volume, resume_prediction=None, checkpoint_filename=None, checkpoint_label_interval=20, seed_generator='sobel', background_label_id=0, bias=True, move_batch_size=1, max_moves=None, max_bodies=None, num_workers=CONFIG.training.num_gpus, worker_prequeue=1, filter_seeds_by_mask=True, reject_non_seed_components=True, reject_early_termination=False, remask_interval=None, shuffle_seeds=True): subvolume = volume.get_subvolume( SubvolumeBounds(start=np.zeros(3, dtype=np.int64), stop=volume.shape)) # Create an output label volume. if resume_prediction is None: prediction = np.full_like(subvolume.image, background_label_id, dtype=np.uint64) label_id = 0 else: if resume_prediction.shape != subvolume.image.shape: raise ValueError('Resume volume prediction is wrong shape.') prediction = resume_prediction prediction.flags.writeable = True label_id = prediction.max() # Create a conflict count volume that tracks locations where segmented # bodies overlap. For now the first body takes precedence in the # predicted labels. conflict_count = np.full_like(prediction, 0, dtype=np.uint32) def worker(worker_id, set_devices, model_file, image, seeds, results, lock, revoked): lock.acquire() import tensorflow as tf if set_devices: # Only make one GPU visible to Tensorflow so that it does not allocate # all available memory on all devices. # See: https://stackoverflow.com/questions/37893755 os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = str(worker_id) with tf.device('/gpu:0'): # Late import to avoid Keras import until TF bindings are set. from .network import load_model logging.debug('Worker %s: loading model', worker_id) model = load_model(model_file, CONFIG.network) lock.release() def is_revoked(test_seed): ret = False lock.acquire() if tuple(test_seed) in revoked: ret = True revoked.remove(tuple(test_seed)) lock.release() return ret while True: seed = seeds.get(True) if not isinstance(seed, np.ndarray): logging.debug('Worker %s: got DONE', worker_id) break if is_revoked(seed): results.put((seed, None)) continue def stopping_callback(region): stop = is_revoked(seed) if reject_non_seed_components and \ region.bias_against_merge and \ region.mask[tuple(region.seed_vox)] < 0.5: stop = True return stop logging.debug('Worker %s: got seed %s', worker_id, np.array_str(seed)) # Flood-fill and get resulting mask. # Allow reading outside the image volume bounds to allow segmentation # to fill all the way to the boundary. region = Region(image, seed_vox=seed, sparse_mask=True, block_padding='reflect') region.bias_against_merge = bias early_termination = False try: six.next( region.fill(model, move_batch_size=move_batch_size, max_moves=max_moves, progress=2 + worker_id, stopping_callback=stopping_callback, remask_interval=remask_interval)) except Region.EarlyFillTermination: early_termination = True except StopIteration: pass if reject_early_termination and early_termination: body = None else: body = region.to_body() logging.debug('Worker %s: seed %s filled', worker_id, np.array_str(seed)) results.put((seed, body)) # Generate seeds from volume. generator = preprocessing.SEED_GENERATORS[seed_generator] seeds = generator(subvolume.image, CONFIG.volume.resolution) if filter_seeds_by_mask and volume.mask_data is not None: seeds = [ s for s in seeds if volume.mask_data[tuple(volume.world_coord_to_local(s))] ] pbar = tqdm(desc='Seed queue', total=len(seeds), miniters=1, smoothing=0.0) label_pbar = tqdm(desc='Labeled vox', total=prediction.size, miniters=1, smoothing=0.0, position=1) num_seeds = len(seeds) if shuffle_seeds: random.shuffle(seeds) seeds = iter(seeds) manager = Manager() # Queue of seeds to be picked up by workers. seed_queue = manager.Queue() # Queue of results from workers. results_queue = manager.Queue() # Dequeue of seeds that were put in seed_queue but have not yet been # combined by the main process. dispatched_seeds = deque() # Seeds that were placed in seed_queue but subsequently covered by other # results before their results have been processed. This allows workers to # abort working on these seeds by checking this list. revoked_seeds = manager.list() # Results that have been received by the main process but have not yet # been combined because they were not received in the dispatch order. unordered_results = {} def queue_next_seed(): total = 0 for seed in seeds: if prediction[seed[0], seed[1], seed[2]] != background_label_id: # This seed has already been filled. total += 1 continue dispatched_seeds.append(seed) seed_queue.put(seed) break return total for _ in range(min(num_seeds, num_workers * worker_prequeue)): processed_seeds = queue_next_seed() pbar.update(processed_seeds) if 'CUDA_VISIBLE_DEVICES' in os.environ: set_devices = False num_workers = 1 logging.warn( 'Environment variable CUDA_VISIBLE_DEVICES is set, so only one worker can be used.\n' 'See https://github.com/aschampion/diluvian/issues/11') else: set_devices = True workers = [] loading_lock = manager.Lock() for worker_id in range(num_workers): w = Process(target=worker, args=(worker_id, set_devices, model_file, subvolume.image, seed_queue, results_queue, loading_lock, revoked_seeds)) w.start() workers.append(w) last_checkpoint_label = label_id # For each seed, create region, fill, threshold, and merge to output volume. while dispatched_seeds: processed_seeds = 1 expected_seed = dispatched_seeds.popleft() logging.debug('Expecting seed %s', np.array_str(expected_seed)) if tuple(expected_seed) in unordered_results: logging.debug('Expected seed %s is in old results', np.array_str(expected_seed)) seed = expected_seed body = unordered_results[tuple(seed)] del unordered_results[tuple(seed)] else: seed, body = results_queue.get(True) processed_seeds += queue_next_seed() while not np.array_equal(seed, expected_seed): logging.debug('Seed %s is early, stashing', np.array_str(seed)) unordered_results[tuple(seed)] = body seed, body = results_queue.get(True) processed_seeds += queue_next_seed() logging.debug('Processing seed at %s', np.array_str(seed)) pbar.set_description('Seed ' + np.array_str(seed)) pbar.update(processed_seeds) if prediction[seed[0], seed[1], seed[2]] != background_label_id: # This seed has already been filled. logging.debug( 'Seed (%s) was filled but has been covered in the meantime.', np.array_str(seed)) loading_lock.acquire() if tuple(seed) in revoked_seeds: revoked_seeds.remove(tuple(seed)) loading_lock.release() continue if body is None: logging.debug('Body was None.') continue if reject_non_seed_components and not body.is_seed_in_mask(): logging.debug('Seed (%s) is not in its body.', np.array_str(seed)) continue if reject_non_seed_components: mask, bounds = body.get_seeded_component( CONFIG.postprocessing.closing_shape) else: mask, bounds = body._get_bounded_mask() body_size = np.count_nonzero(mask) if body_size == 0: logging.debug('Body was empty.') continue # Generate a label ID for this region. label_id += 1 if label_id == background_label_id: label_id += 1 logging.debug('Adding body to prediction label volume.') bounds_shape = list(map(slice, bounds[0], bounds[1])) prediction_mask = prediction[bounds_shape] == background_label_id for seed in dispatched_seeds: if np.all(bounds[0] <= seed) and np.all( bounds[1] > seed) and mask[tuple(seed - bounds[0])]: loading_lock.acquire() if tuple(seed) not in revoked_seeds: revoked_seeds.append(tuple(seed)) loading_lock.release() conflict_count[bounds_shape][np.logical_and( np.logical_not(prediction_mask), mask)] += 1 label_shape = np.logical_and(prediction_mask, mask) prediction[bounds_shape][np.logical_and(prediction_mask, mask)] = label_id label_pbar.set_description('Label {}'.format(label_id)) label_pbar.update(np.count_nonzero(label_shape)) logging.info('Filled seed (%s) with %s voxels labeled %s.', np.array_str(seed), body_size, label_id) if max_bodies and label_id >= max_bodies: # Drain the queues. while not seed_queue.empty(): seed_queue.get_nowait() break if checkpoint_filename is not None and label_id - last_checkpoint_label > checkpoint_label_interval: config = HDF5Volume.write_file(checkpoint_filename + '.hdf5', CONFIG.volume.resolution, label_data=prediction) config['name'] = 'segmentation checkpoint' with open(checkpoint_filename + '.toml', 'wb') as tomlfile: tomlfile.write('# Filling model: {}\n'.format(model_file)) tomlfile.write(str(toml.dumps({'dataset': [config]}))) for _ in range(num_workers): seed_queue.put('DONE') for wid, worker in enumerate(workers): worker.join() manager.shutdown() label_pbar.close() pbar.close() return prediction, conflict_count
def store_thumbnails(image_file, thumbnail_dictionary): colors = process_images(image_file) # print(colors) if colors: # print(colors) for item in colors: if item[0] in thumbnail_dictionary: thumbnail_dictionary[item[0]] += [image_file] else: thumbnail_dictionary[item[0]] = [image_file] if __name__ == "__main__": manager = Manager() image_thumbnail_dictionary = manager.dict() picture_folder_name = sys.argv[1] current_folder = os.getcwd() base_folder_name = os.path.basename(picture_folder_name) data_json_file = os.path.join(current_folder + os.sep + "data" + os.sep + str(base_folder_name + "_data.json")) f = find_pictures(picture_folder_name) # print(f) # print(image_thumbnail_dictionary) pool = Pool(cpu_count())
from multiprocessing import Manager, Pool import os, time, random def reader(q): print('reader启动{},父进程的pid为{}'.format(os.getpid(), os.getppid())) for i in range(q.qsize()): time.sleep(1) print('reader从Queue获取到消息{}'.format(q.get())) def writer(q): print('writer启动{},父进程的pid为{}'.format(os.getpid(), os.getppid())) for i in 'abcdefg': q.put(i) if __name__ == '__main__': p = Pool() q = Manager().Queue() #共享队列 p.apply_async(writer, args=(q, )) time.sleep(1) p.apply_async(reader, args=(q, )) p.close() p.join()
class SQLDB: PRAGMAS = """ pragma journal_mode=WAL; """ CREATE_CLAIM_TABLE = """ create table if not exists claim ( claim_hash bytes primary key, claim_id text not null, claim_name text not null, normalized text not null, txo_hash bytes not null, tx_position integer not null, amount integer not null, timestamp integer not null, -- last updated timestamp creation_timestamp integer not null, height integer not null, -- last updated height creation_height integer not null, activation_height integer, expiration_height integer not null, release_time integer not null, short_url text not null, -- normalized#shortest-unique-claim_id canonical_url text, -- channel's-short_url/normalized#shortest-unique-claim_id-within-channel title text, author text, description text, claim_type integer, reposted integer default 0, -- streams stream_type text, media_type text, fee_amount integer default 0, fee_currency text, duration integer, -- reposts reposted_claim_hash bytes, -- claims which are channels public_key_bytes bytes, public_key_hash bytes, claims_in_channel integer, -- claims which are inside channels channel_hash bytes, channel_join integer, -- height at which claim got valid signature / joined channel signature bytes, signature_digest bytes, signature_valid bool, effective_amount integer not null default 0, support_amount integer not null default 0, trending_group integer not null default 0, trending_mixed integer not null default 0, trending_local integer not null default 0, trending_global integer not null default 0 ); create index if not exists claim_normalized_idx on claim (normalized, activation_height); create index if not exists claim_channel_hash_idx on claim (channel_hash, signature, claim_hash); create index if not exists claim_claims_in_channel_idx on claim (signature_valid, channel_hash, normalized); create index if not exists claim_txo_hash_idx on claim (txo_hash); create index if not exists claim_activation_height_idx on claim (activation_height, claim_hash); create index if not exists claim_expiration_height_idx on claim (expiration_height); create index if not exists claim_reposted_claim_hash_idx on claim (reposted_claim_hash); """ CREATE_SUPPORT_TABLE = """ create table if not exists support ( txo_hash bytes primary key, tx_position integer not null, height integer not null, claim_hash bytes not null, amount integer not null ); create index if not exists support_claim_hash_idx on support (claim_hash, height); """ CREATE_TAG_TABLE = """ create table if not exists tag ( tag text not null, claim_hash bytes not null, height integer not null ); create unique index if not exists tag_claim_hash_tag_idx on tag (claim_hash, tag); """ CREATE_CLAIMTRIE_TABLE = """ create table if not exists claimtrie ( normalized text primary key, claim_hash bytes not null, last_take_over_height integer not null ); create index if not exists claimtrie_claim_hash_idx on claimtrie (claim_hash); """ SEARCH_INDEXES = """ -- used by any tag clouds create index if not exists tag_tag_idx on tag (tag, claim_hash); -- naked order bys (no filters) create unique index if not exists claim_release_idx on claim (release_time, claim_hash); create unique index if not exists claim_trending_idx on claim (trending_group, trending_mixed, claim_hash); create unique index if not exists claim_effective_amount_idx on claim (effective_amount, claim_hash); -- claim_type filter + order by create unique index if not exists claim_type_release_idx on claim (release_time, claim_type, claim_hash); create unique index if not exists claim_type_trending_idx on claim (trending_group, trending_mixed, claim_type, claim_hash); create unique index if not exists claim_type_effective_amount_idx on claim (effective_amount, claim_type, claim_hash); -- stream_type filter + order by create unique index if not exists stream_type_release_idx on claim (stream_type, release_time, claim_hash); create unique index if not exists stream_type_trending_idx on claim (stream_type, trending_group, trending_mixed, claim_hash); create unique index if not exists stream_type_effective_amount_idx on claim (stream_type, effective_amount, claim_hash); -- channel_hash filter + order by create unique index if not exists channel_hash_release_idx on claim (channel_hash, release_time, claim_hash); create unique index if not exists channel_hash_trending_idx on claim (channel_hash, trending_group, trending_mixed, claim_hash); create unique index if not exists channel_hash_effective_amount_idx on claim (channel_hash, effective_amount, claim_hash); -- duration filter + order by create unique index if not exists duration_release_idx on claim (duration, release_time, claim_hash); create unique index if not exists duration_trending_idx on claim (duration, trending_group, trending_mixed, claim_hash); create unique index if not exists duration_effective_amount_idx on claim (duration, effective_amount, claim_hash); -- fee_amount + order by create unique index if not exists fee_amount_release_idx on claim (fee_amount, release_time, claim_hash); create unique index if not exists fee_amount_trending_idx on claim (fee_amount, trending_group, trending_mixed, claim_hash); create unique index if not exists fee_amount_effective_amount_idx on claim (fee_amount, effective_amount, claim_hash); -- TODO: verify that all indexes below are used create index if not exists claim_height_normalized_idx on claim (height, normalized asc); create index if not exists claim_resolve_idx on claim (normalized, claim_id); create index if not exists claim_id_idx on claim (claim_id, claim_hash); create index if not exists claim_timestamp_idx on claim (timestamp); create index if not exists claim_public_key_hash_idx on claim (public_key_hash); create index if not exists claim_signature_valid_idx on claim (signature_valid); """ TAG_INDEXES = '\n'.join( f"create unique index if not exists tag_{tag_key}_idx on tag (tag, claim_hash) WHERE tag='{tag_value}';" for tag_value, tag_key in COMMON_TAGS.items()) CREATE_TABLES_QUERY = (CREATE_CLAIM_TABLE + CREATE_FULL_TEXT_SEARCH + CREATE_SUPPORT_TABLE + CREATE_CLAIMTRIE_TABLE + CREATE_TAG_TABLE) def __init__(self, main, path: str, blocking_channels: list, filtering_channels: list, trending: list): self.main = main self._db_path = path self.db = None self.logger = class_logger(__name__, self.__class__.__name__) self.ledger = Ledger if main.coin.NET == 'mainnet' else RegTestLedger self._fts_synced = False self.state_manager = None self.blocked_streams = None self.blocked_channels = None self.blocking_channel_hashes = { unhexlify(channel_id)[::-1] for channel_id in blocking_channels if channel_id } self.filtered_streams = None self.filtered_channels = None self.filtering_channel_hashes = { unhexlify(channel_id)[::-1] for channel_id in filtering_channels if channel_id } self.trending = trending def open(self): self.db = apsw.Connection( self._db_path, flags=(apsw.SQLITE_OPEN_READWRITE | apsw.SQLITE_OPEN_CREATE | apsw.SQLITE_OPEN_URI)) def exec_factory(cursor, statement, bindings): tpl = namedtuple('row', (d[0] for d in cursor.getdescription())) cursor.setrowtrace(lambda cursor, row: tpl(*row)) return True self.db.setexectrace(exec_factory) self.execute(self.PRAGMAS) self.execute(self.CREATE_TABLES_QUERY) register_canonical_functions(self.db) self.state_manager = Manager() self.blocked_streams = self.state_manager.dict() self.blocked_channels = self.state_manager.dict() self.filtered_streams = self.state_manager.dict() self.filtered_channels = self.state_manager.dict() self.update_blocked_and_filtered_claims() for algorithm in self.trending: algorithm.install(self.db) def close(self): if self.db is not None: self.db.close() if self.state_manager is not None: self.state_manager.shutdown() def update_blocked_and_filtered_claims(self): self.update_claims_from_channel_hashes(self.blocked_streams, self.blocked_channels, self.blocking_channel_hashes) self.update_claims_from_channel_hashes(self.filtered_streams, self.filtered_channels, self.filtering_channel_hashes) self.filtered_streams.update(self.blocked_streams) self.filtered_channels.update(self.blocked_channels) def update_claims_from_channel_hashes(self, shared_streams, shared_channels, channel_hashes): streams, channels = {}, {} if channel_hashes: sql = query( "SELECT repost.channel_hash, repost.reposted_claim_hash, target.claim_type " "FROM claim as repost JOIN claim AS target ON (target.claim_hash=repost.reposted_claim_hash)", **{ 'repost.reposted_claim_hash__is_not_null': 1, 'repost.channel_hash__in': channel_hashes }) for blocked_claim in self.execute(*sql): if blocked_claim.claim_type == CLAIM_TYPES['stream']: streams[blocked_claim. reposted_claim_hash] = blocked_claim.channel_hash elif blocked_claim.claim_type == CLAIM_TYPES['channel']: channels[blocked_claim. reposted_claim_hash] = blocked_claim.channel_hash shared_streams.clear() shared_streams.update(streams) shared_channels.clear() shared_channels.update(channels) @staticmethod def _insert_sql(table: str, data: dict) -> Tuple[str, list]: columns, values = [], [] for column, value in data.items(): columns.append(column) values.append(value) sql = (f"INSERT INTO {table} ({', '.join(columns)}) " f"VALUES ({', '.join(['?'] * len(values))})") return sql, values @staticmethod def _update_sql(table: str, data: dict, where: str, constraints: Union[list, tuple]) -> Tuple[str, list]: columns, values = [], [] for column, value in data.items(): columns.append(f"{column} = ?") values.append(value) values.extend(constraints) return f"UPDATE {table} SET {', '.join(columns)} WHERE {where}", values @staticmethod def _delete_sql(table: str, constraints: dict) -> Tuple[str, dict]: where, values = constraints_to_sql(constraints) return f"DELETE FROM {table} WHERE {where}", values def execute(self, *args): return self.db.cursor().execute(*args) def executemany(self, *args): return self.db.cursor().executemany(*args) def begin(self): self.execute('begin;') def commit(self): self.execute('commit;') def _upsertable_claims(self, txos: List[Output], header, clear_first=False): claim_hashes, claims, tags = set(), [], {} for txo in txos: tx = txo.tx_ref.tx try: assert txo.claim_name assert txo.normalized_name except: #self.logger.exception(f"Could not decode claim name for {tx.id}:{txo.position}.") continue claim_hash = txo.claim_hash claim_hashes.add(claim_hash) claim_record = { 'claim_hash': claim_hash, 'claim_id': txo.claim_id, 'claim_name': txo.claim_name, 'normalized': txo.normalized_name, 'txo_hash': txo.ref.hash, 'tx_position': tx.position, 'amount': txo.amount, 'timestamp': header['timestamp'], 'height': tx.height, 'title': None, 'description': None, 'author': None, 'duration': None, 'claim_type': None, 'stream_type': None, 'media_type': None, 'release_time': None, 'fee_currency': None, 'fee_amount': 0, 'reposted_claim_hash': None } claims.append(claim_record) try: claim = txo.claim except: #self.logger.exception(f"Could not parse claim protobuf for {tx.id}:{txo.position}.") continue if claim.is_stream: claim_record['claim_type'] = CLAIM_TYPES['stream'] claim_record['media_type'] = claim.stream.source.media_type claim_record['stream_type'] = STREAM_TYPES[guess_stream_type( claim_record['media_type'])] claim_record['title'] = claim.stream.title claim_record['description'] = claim.stream.description claim_record['author'] = claim.stream.author if claim.stream.video and claim.stream.video.duration: claim_record['duration'] = claim.stream.video.duration if claim.stream.audio and claim.stream.audio.duration: claim_record['duration'] = claim.stream.audio.duration if claim.stream.release_time: claim_record['release_time'] = claim.stream.release_time if claim.stream.has_fee: fee = claim.stream.fee if isinstance(fee.currency, str): claim_record['fee_currency'] = fee.currency.lower() if isinstance(fee.amount, Decimal): claim_record['fee_amount'] = int(fee.amount * 1000) elif claim.is_repost: claim_record['claim_type'] = CLAIM_TYPES['repost'] claim_record[ 'reposted_claim_hash'] = claim.repost.reference.claim_hash elif claim.is_channel: claim_record['claim_type'] = CLAIM_TYPES['channel'] for tag in clean_tags(claim.message.tags): tags[(tag, claim_hash)] = (tag, claim_hash, tx.height) if clear_first: self._clear_claim_metadata(claim_hashes) if tags: self.executemany( "INSERT OR IGNORE INTO tag (tag, claim_hash, height) VALUES (?, ?, ?)", tags.values()) return claims def insert_claims(self, txos: List[Output], header): claims = self._upsertable_claims(txos, header) if claims: self.executemany( """ INSERT OR IGNORE INTO claim ( claim_hash, claim_id, claim_name, normalized, txo_hash, tx_position, amount, claim_type, media_type, stream_type, timestamp, creation_timestamp, fee_currency, fee_amount, title, description, author, duration, height, reposted_claim_hash, creation_height, release_time, activation_height, expiration_height, short_url) VALUES ( :claim_hash, :claim_id, :claim_name, :normalized, :txo_hash, :tx_position, :amount, :claim_type, :media_type, :stream_type, :timestamp, :timestamp, :fee_currency, :fee_amount, :title, :description, :author, :duration, :height, :reposted_claim_hash, :height, CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE :timestamp END, CASE WHEN :normalized NOT IN (SELECT normalized FROM claimtrie) THEN :height END, CASE WHEN :height >= 137181 THEN :height+2102400 ELSE :height+262974 END, :claim_name||COALESCE( (SELECT shortest_id(claim_id, :claim_id) FROM claim WHERE normalized = :normalized), '#'||substr(:claim_id, 1, 1) ) )""", claims) def update_claims(self, txos: List[Output], header): claims = self._upsertable_claims(txos, header, clear_first=True) if claims: self.executemany( """ UPDATE claim SET txo_hash=:txo_hash, tx_position=:tx_position, amount=:amount, height=:height, claim_type=:claim_type, media_type=:media_type, stream_type=:stream_type, timestamp=:timestamp, fee_amount=:fee_amount, fee_currency=:fee_currency, title=:title, duration=:duration, description=:description, author=:author, reposted_claim_hash=:reposted_claim_hash, release_time=CASE WHEN :release_time IS NOT NULL THEN :release_time ELSE release_time END WHERE claim_hash=:claim_hash; """, claims) def delete_claims(self, claim_hashes: Set[bytes]): """ Deletes claim supports and from claimtrie in case of an abandon. """ if claim_hashes: affected_channels = self.execute( *query("SELECT channel_hash FROM claim", channel_hash__is_not_null=1, claim_hash__in=claim_hashes)).fetchall() for table in ('claim', 'support', 'claimtrie'): self.execute( *self._delete_sql(table, {'claim_hash__in': claim_hashes})) self._clear_claim_metadata(claim_hashes) return {r.channel_hash for r in affected_channels} return set() def delete_claims_above_height(self, height: int): claim_hashes = [ x[0] for x in self.execute( "SELECT claim_hash FROM claim WHERE height>?", ( height, )).fetchall() ] while claim_hashes: batch = set(claim_hashes[:500]) claim_hashes = claim_hashes[500:] self.delete_claims(batch) def _clear_claim_metadata(self, claim_hashes: Set[bytes]): if claim_hashes: for table in ('tag', ): # 'language', 'location', etc self.execute( *self._delete_sql(table, {'claim_hash__in': claim_hashes})) def split_inputs_into_claims_supports_and_other(self, txis): txo_hashes = {txi.txo_ref.hash for txi in txis} claims = self.execute( *query("SELECT txo_hash, claim_hash, normalized FROM claim", txo_hash__in=txo_hashes)).fetchall() txo_hashes -= {r.txo_hash for r in claims} supports = {} if txo_hashes: supports = self.execute( *query("SELECT txo_hash, claim_hash FROM support", txo_hash__in=txo_hashes)).fetchall() txo_hashes -= {r.txo_hash for r in supports} return claims, supports, txo_hashes def insert_supports(self, txos: List[Output]): supports = [] for txo in txos: tx = txo.tx_ref.tx supports.append((txo.ref.hash, tx.position, tx.height, txo.claim_hash, txo.amount)) if supports: self.executemany( "INSERT OR IGNORE INTO support (" " txo_hash, tx_position, height, claim_hash, amount" ") " "VALUES (?, ?, ?, ?, ?)", supports) def delete_supports(self, txo_hashes: Set[bytes]): if txo_hashes: self.execute( *self._delete_sql('support', {'txo_hash__in': txo_hashes})) def calculate_reposts(self, txos: List[Output]): targets = set() for txo in txos: try: claim = txo.claim except: continue if claim.is_repost: targets.add((claim.repost.reference.claim_hash, )) if targets: self.executemany( """ UPDATE claim SET reposted = ( SELECT count(*) FROM claim AS repost WHERE repost.reposted_claim_hash = claim.claim_hash ) WHERE claim_hash = ? """, targets) def validate_channel_signatures(self, height, new_claims, updated_claims, spent_claims, affected_channels, timer): if not new_claims and not updated_claims and not spent_claims: return sub_timer = timer.add_timer('segregate channels and signables') sub_timer.start() channels, new_channel_keys, signables = {}, {}, {} for txo in chain(new_claims, updated_claims): try: claim = txo.claim except: continue if claim.is_channel: channels[txo.claim_hash] = txo new_channel_keys[ txo.claim_hash] = claim.channel.public_key_bytes else: signables[txo.claim_hash] = txo sub_timer.stop() sub_timer = timer.add_timer('make list of channels we need to lookup') sub_timer.start() missing_channel_keys = set() for txo in signables.values(): claim = txo.claim if claim.is_signed and claim.signing_channel_hash not in new_channel_keys: missing_channel_keys.add(claim.signing_channel_hash) sub_timer.stop() sub_timer = timer.add_timer('lookup missing channels') sub_timer.start() all_channel_keys = {} if new_channel_keys or missing_channel_keys or affected_channels: all_channel_keys = dict( self.execute( *query("SELECT claim_hash, public_key_bytes FROM claim", claim_hash__in=set(new_channel_keys) | missing_channel_keys | affected_channels))) sub_timer.stop() sub_timer = timer.add_timer('prepare for updating claims') sub_timer.start() changed_channel_keys = {} for claim_hash, new_key in new_channel_keys.items(): if claim_hash not in all_channel_keys or all_channel_keys[ claim_hash] != new_key: all_channel_keys[claim_hash] = new_key changed_channel_keys[claim_hash] = new_key claim_updates = [] for claim_hash, txo in signables.items(): claim = txo.claim update = { 'claim_hash': claim_hash, 'channel_hash': None, 'signature': None, 'signature_digest': None, 'signature_valid': None } if claim.is_signed: update.update({ 'channel_hash': claim.signing_channel_hash, 'signature': txo.get_encoded_signature(), 'signature_digest': txo.get_signature_digest(self.ledger), 'signature_valid': 0 }) claim_updates.append(update) sub_timer.stop() sub_timer = timer.add_timer( 'find claims affected by a change in channel key') sub_timer.start() if changed_channel_keys: sql = f""" SELECT * FROM claim WHERE channel_hash IN ({','.join('?' for _ in changed_channel_keys)}) AND signature IS NOT NULL """ for affected_claim in self.execute(sql, changed_channel_keys.keys()): if affected_claim.claim_hash not in signables: claim_updates.append({ 'claim_hash': affected_claim.claim_hash, 'channel_hash': affected_claim.channel_hash, 'signature': affected_claim.signature, 'signature_digest': affected_claim.signature_digest, 'signature_valid': 0 }) sub_timer.stop() sub_timer = timer.add_timer('verify signatures') sub_timer.start() for update in claim_updates: channel_pub_key = all_channel_keys.get(update['channel_hash']) if channel_pub_key and update['signature']: update['signature_valid'] = Output.is_signature_valid( bytes(update['signature']), bytes(update['signature_digest']), channel_pub_key) sub_timer.stop() sub_timer = timer.add_timer('update claims') sub_timer.start() if claim_updates: self.executemany( f""" UPDATE claim SET channel_hash=:channel_hash, signature=:signature, signature_digest=:signature_digest, signature_valid=:signature_valid, channel_join=CASE WHEN signature_valid=1 AND :signature_valid=1 AND channel_hash=:channel_hash THEN channel_join WHEN :signature_valid=1 THEN {height} END, canonical_url=CASE WHEN signature_valid=1 AND :signature_valid=1 AND channel_hash=:channel_hash THEN canonical_url WHEN :signature_valid=1 THEN (SELECT short_url FROM claim WHERE claim_hash=:channel_hash)||'/'|| claim_name||COALESCE( (SELECT shortest_id(other_claim.claim_id, claim.claim_id) FROM claim AS other_claim WHERE other_claim.signature_valid = 1 AND other_claim.channel_hash = :channel_hash AND other_claim.normalized = claim.normalized), '#'||substr(claim_id, 1, 1) ) END WHERE claim_hash=:claim_hash; """, claim_updates) sub_timer.stop() sub_timer = timer.add_timer('update claims affected by spent channels') sub_timer.start() if spent_claims: self.execute( f""" UPDATE claim SET signature_valid=CASE WHEN signature IS NOT NULL THEN 0 END, channel_join=NULL, canonical_url=NULL WHERE channel_hash IN ({','.join('?' for _ in spent_claims)}) """, spent_claims) sub_timer.stop() sub_timer = timer.add_timer('update channels') sub_timer.start() if channels: self.executemany( """ UPDATE claim SET public_key_bytes=:public_key_bytes, public_key_hash=:public_key_hash WHERE claim_hash=:claim_hash""", [{ 'claim_hash': claim_hash, 'public_key_bytes': txo.claim.channel.public_key_bytes, 'public_key_hash': self.ledger.address_to_hash160( self.ledger.public_key_to_address( txo.claim.channel.public_key_bytes)) } for claim_hash, txo in channels.items()]) sub_timer.stop() sub_timer = timer.add_timer('update claims_in_channel counts') sub_timer.start() if all_channel_keys: self.executemany( f""" UPDATE claim SET claims_in_channel=( SELECT COUNT(*) FROM claim AS claim_in_channel WHERE claim_in_channel.signature_valid=1 AND claim_in_channel.channel_hash=claim.claim_hash ) WHERE claim_hash = ? """, [(channel_hash, ) for channel_hash in all_channel_keys.keys()]) sub_timer.stop() sub_timer = timer.add_timer('update blocked claims list') sub_timer.start() if (self.blocking_channel_hashes.intersection(all_channel_keys) or self.filtering_channel_hashes.intersection(all_channel_keys)): self.update_blocked_and_filtered_claims() sub_timer.stop() def _update_support_amount(self, claim_hashes): if claim_hashes: self.execute( f""" UPDATE claim SET support_amount = COALESCE( (SELECT SUM(amount) FROM support WHERE support.claim_hash=claim.claim_hash), 0 ) WHERE claim_hash IN ({','.join('?' for _ in claim_hashes)}) """, claim_hashes) def _update_effective_amount(self, height, claim_hashes=None): self.execute( f"UPDATE claim SET effective_amount = amount + support_amount " f"WHERE activation_height = {height}") if claim_hashes: self.execute( f"UPDATE claim SET effective_amount = amount + support_amount " f"WHERE activation_height < {height} " f" AND claim_hash IN ({','.join('?' for _ in claim_hashes)})", claim_hashes) def _calculate_activation_height(self, height): last_take_over_height = f"""COALESCE( (SELECT last_take_over_height FROM claimtrie WHERE claimtrie.normalized=claim.normalized), {height} ) """ self.execute(f""" UPDATE claim SET activation_height = {height} + min(4032, cast(({height} - {last_take_over_height}) / 32 AS INT)) WHERE activation_height IS NULL """) def _perform_overtake(self, height, changed_claim_hashes, deleted_names): deleted_names_sql = claim_hashes_sql = "" if changed_claim_hashes: claim_hashes_sql = f"OR claim_hash IN ({','.join('?' for _ in changed_claim_hashes)})" if deleted_names: deleted_names_sql = f"OR normalized IN ({','.join('?' for _ in deleted_names)})" overtakes = self.execute( f""" SELECT winner.normalized, winner.claim_hash, claimtrie.claim_hash AS current_winner, MAX(winner.effective_amount) AS max_winner_effective_amount FROM ( SELECT normalized, claim_hash, effective_amount FROM claim WHERE normalized IN ( SELECT normalized FROM claim WHERE activation_height={height} {claim_hashes_sql} ) {deleted_names_sql} ORDER BY effective_amount DESC, height ASC, tx_position ASC ) AS winner LEFT JOIN claimtrie USING (normalized) GROUP BY winner.normalized HAVING current_winner IS NULL OR current_winner <> winner.claim_hash """, list(changed_claim_hashes) + deleted_names) for overtake in overtakes: if overtake.current_winner: self.execute( f"UPDATE claimtrie SET claim_hash = ?, last_take_over_height = {height} " f"WHERE normalized = ?", (overtake.claim_hash, overtake.normalized)) else: self.execute( f"INSERT INTO claimtrie (claim_hash, normalized, last_take_over_height) " f"VALUES (?, ?, {height})", (overtake.claim_hash, overtake.normalized)) self.execute( f"UPDATE claim SET activation_height = {height} WHERE normalized = ? " f"AND (activation_height IS NULL OR activation_height > {height})", (overtake.normalized, )) def _copy(self, height): if height > 50: self.execute(f"DROP TABLE claimtrie{height-50}") self.execute( f"CREATE TABLE claimtrie{height} AS SELECT * FROM claimtrie") def update_claimtrie(self, height, changed_claim_hashes, deleted_names, timer): r = timer.run r(self._calculate_activation_height, height) r(self._update_support_amount, changed_claim_hashes) r(self._update_effective_amount, height, changed_claim_hashes) r(self._perform_overtake, height, changed_claim_hashes, list(deleted_names)) r(self._update_effective_amount, height) r(self._perform_overtake, height, [], []) def get_expiring(self, height): return self.execute( f"SELECT claim_hash, normalized FROM claim WHERE expiration_height = {height}" ) def advance_txs(self, height, all_txs, header, daemon_height, timer): insert_claims = [] update_claims = [] update_claim_hashes = set() delete_claim_hashes = set() insert_supports = [] delete_support_txo_hashes = set() recalculate_claim_hashes = set( ) # added/deleted supports, added/updated claim deleted_claim_names = set() delete_others = set() body_timer = timer.add_timer('body') for position, (etx, txid) in enumerate(all_txs): tx = timer.run(Transaction, etx.raw, height=height, position=position) # Inputs spent_claims, spent_supports, spent_others = timer.run( self.split_inputs_into_claims_supports_and_other, tx.inputs) body_timer.start() delete_claim_hashes.update({r.claim_hash for r in spent_claims}) deleted_claim_names.update({r.normalized for r in spent_claims}) delete_support_txo_hashes.update( {r.txo_hash for r in spent_supports}) recalculate_claim_hashes.update( {r.claim_hash for r in spent_supports}) delete_others.update(spent_others) # Outputs for output in tx.outputs: if output.is_support: insert_supports.append(output) recalculate_claim_hashes.add(output.claim_hash) elif output.script.is_claim_name: insert_claims.append(output) recalculate_claim_hashes.add(output.claim_hash) elif output.script.is_update_claim: claim_hash = output.claim_hash update_claims.append(output) recalculate_claim_hashes.add(claim_hash) body_timer.stop() skip_update_claim_timer = timer.add_timer( 'skip update of abandoned claims') skip_update_claim_timer.start() for updated_claim in list(update_claims): if updated_claim.ref.hash in delete_others: update_claims.remove(updated_claim) for updated_claim in update_claims: claim_hash = updated_claim.claim_hash delete_claim_hashes.discard(claim_hash) update_claim_hashes.add(claim_hash) skip_update_claim_timer.stop() skip_insert_claim_timer = timer.add_timer( 'skip insertion of abandoned claims') skip_insert_claim_timer.start() for new_claim in list(insert_claims): if new_claim.ref.hash in delete_others: if new_claim.claim_hash not in update_claim_hashes: insert_claims.remove(new_claim) skip_insert_claim_timer.stop() skip_insert_support_timer = timer.add_timer( 'skip insertion of abandoned supports') skip_insert_support_timer.start() for new_support in list(insert_supports): if new_support.ref.hash in delete_others: insert_supports.remove(new_support) skip_insert_support_timer.stop() expire_timer = timer.add_timer('recording expired claims') expire_timer.start() for expired in self.get_expiring(height): delete_claim_hashes.add(expired.claim_hash) deleted_claim_names.add(expired.normalized) expire_timer.stop() r = timer.run r(update_full_text_search, 'before-delete', delete_claim_hashes, self.db.cursor(), self.main.first_sync) affected_channels = r(self.delete_claims, delete_claim_hashes) r(self.delete_supports, delete_support_txo_hashes) r(self.insert_claims, insert_claims, header) r(self.calculate_reposts, insert_claims) r(update_full_text_search, 'after-insert', [txo.claim_hash for txo in insert_claims], self.db.cursor(), self.main.first_sync) r(update_full_text_search, 'before-update', [txo.claim_hash for txo in update_claims], self.db.cursor(), self.main.first_sync) r(self.update_claims, update_claims, header) r(update_full_text_search, 'after-update', [txo.claim_hash for txo in update_claims], self.db.cursor(), self.main.first_sync) r(self.validate_channel_signatures, height, insert_claims, update_claims, delete_claim_hashes, affected_channels, forward_timer=True) r(self.insert_supports, insert_supports) r(self.update_claimtrie, height, recalculate_claim_hashes, deleted_claim_names, forward_timer=True) for algorithm in self.trending: r(algorithm.run, self.db.cursor(), height, daemon_height, recalculate_claim_hashes) if not self._fts_synced and self.main.first_sync and height == daemon_height: r(first_sync_finished, self.db.cursor()) self._fts_synced = True
class DataManager: """ Class to interact with the Data visualizer @author Frederic Abraham """ def __init__(self, display_data: dict, parallel: bool = False, visualize: bool = True): self.display_data = display_data data_names = [ display_name['display_name'] for display_name in list( filter(lambda ele: ele['graph'], display_data.values())) ] self.parallel = parallel self.visualize = visualize if self.visualize: if self.parallel: self.manager = Manager() self.done = self.manager.Value("done", True) self.time_step = self.manager.Value("timestep", 0) self.line_dict = self.manager.dict( {data_name: 0 for data_name in data_names}) self.p = Process(target=run, args=( self.done, self.time_step, self.line_dict, )) self.p.start() else: plt.tight_layout() plt.ion() plt.show() self.time_steps = [] self.data = {data_name: [] for data_name in display_data.keys()} self.colors = plt.get_cmap('plasma')(np.linspace( 0, 0.8, len(self.data))) def update_time_step(self, new_time_step): self.time_steps.append(new_time_step) self.display_data['generation']['value'] = new_time_step if self.visualize: if self.parallel: self.time_step.value = new_time_step def update_value(self, key, value): self.data[key].append(value) self.display_data[key]['value'] = value if self.visualize: if self.parallel and self.display_data[key]['graph']: self.line_dict[self.display_data[key]['display_name']] = value def get_data(self, key): return self.data[key] def update(self): if not self.parallel and self.visualize: animate( self.time_steps, dict( filter(lambda ele: self.display_data[ele[0]]['graph'], self.data.items())), self.colors) def stop(self): if self.visualize and self.parallel: self.done.value = False self.p.join() self.p.close()
def main(argv): login = json.load(open(os.path.join(clientDir, 'config', 'login.json'), )) # initialize thread channels thcmds = {} thcmds['tgApi'] = Queue() thcmds['bot'] = Queue() ths = {} ths['tgApi'] = tgApi( cmdQueue=thcmds, token=login['telegram']['token'], botname=login['telegram']['botname'], authgroup=login['telegram']['authgroup'], itag="tgApi", otag="bot", ) print( 'telegram:', login['telegram']['token'], login['telegram']['botname'], login['telegram']['authgroup'], ) # prepare threadings # initialize threadings for key, th in ths.items(): th.daemon = True th.start() # initialize process channels manager = Manager() smpCmds = {} rmpCmds = {} impReqs = {} mps = {} is_on = {} is_auto = {} thcmds['tgApi'].put('client starts') pkey = 'bot' # socket HOST = login['server']['host'] PORT = login['server']['port'] print( 'MT Communication @', HOST, PORT, ) sel = selectors.DefaultSelector() lsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) lsock.bind((HOST, PORT)) lsock.listen() print('listening on', (HOST, PORT)) lsock.setblocking(False) sel.register(lsock, selectors.EVENT_READ, data=None) while True: events = sel.select(timeout=None) for key, mask in events: if key.data is None: # accept_wrapper(key.fileobj) sock = key.fileobj conn, addr = sock.accept() # Should be ready to read print('accepted connection from', addr) conn.setblocking(False) data = types.SimpleNamespace(addr=addr, inb=b'', outb=b'') events = selectors.EVENT_READ | selectors.EVENT_WRITE sel.register(conn, events, data=data) else: # service_connection(key, mask) sock = key.fileobj data = key.data if mask & selectors.EVENT_READ: recv_data = sock.recv(1024) # Should be ready to read if recv_data: data.outb += recv_data else: print('closing connection to', data.addr) sel.unregister(sock) sock.close() if mask & selectors.EVENT_WRITE: if data.outb: try: thcmds['tgApi'].put(data.outb.decode()) except Exception as e: print(e) print('echoing', repr(data.outb), 'to', data.addr) sent = sock.send(data.outb) # Should be ready to write data.outb = data.outb[sent:]