def extract_features(queue, lsh, child_conn, n_sigs): fe = FeatureExtractor() relationships = [] count = 0 while True: try: line = queue.get_nowait() count += 1 if count % 1000 == 0: print count, " processed, remaining ", queue.qsize() rel_id, rel_type, e1, e2, sentence = line.split('\t') rel_id = int(rel_id.split(":")[1]) shingles = fe.process_index(sentence, e1, e2) try: shingles = shingles.getvalue().strip().split(' ') except AttributeError, e: print line print shingles sys.exit(-1) sigs = MinHash.signature(shingles, n_sigs) lsh.index(rel_type, rel_id, sigs) relationships.append((rel_type, rel_id, sigs, shingles)) except Queue.Empty: print multiprocessing.current_process(), "Queue is Empty" child_conn.send(relationships) break
def RebuildProxy(func, token, serializer, kwds): server = getattr(current_process(), '_manager_server', None) if server and server.address == token.address: return server.id_to_obj[token.id][0] else: incref = kwds.pop('incref', True) and not getattr(current_process(), '_inheriting', False) return func(token, serializer, incref=incref, **kwds)
def getTaskProcess(self): while True: # if self.taskleft()>0: # try: # req = self.q_request.get(block=True,timeout=10000) # except: # continue # else: # threadname=multiprocessing.current_process().name # print threadname+'关闭' # with self.lock: # self.alivenum-=1 # break req = self.q_request.get() with self.lock: #要保证该操作的原子性,进入critical area self.running=self.running+1 threadname=multiprocessing.current_process().name print '进程'+threadname+'发起请求: ' ans=self.do_job(self.job,req,threadname) # ans = self.connectpool.getConnect(req) # self.lock.release() if self.needfinishqueue>0: self.q_finish.put((req,ans)) # self.lock.acquire() with self.lock: self.running= self.running-1 threadname=multiprocessing.current_process().name print '进程'+threadname+'完成请求'
def choice_set_worker(work_queue,done_queue,network,trip_data,master_config,trip_times,ext_bound): this_network=network #initialize link randomizer link_randomizer=None if master_config.choice_set_config['method']=='doubly_stochastic' and not master_config.choice_set_config['randomize_after']: link_randomizer=master_config.choice_set_config.get_link_randomizer(this_network,master_config) if '1' in current_process().name: for var in link_randomizer['variables']: print var," zero p: ",link_randomizer['zero']['probs'][var] print var," posi m: ",link_randomizer['pos']['means'][var] if master_config.choice_set_config['method']=='doubly_stochastic' and master_config.choice_set_config['randomize_after']: link_randomizer=master_config.choice_set_config['randomize_after_dev'] idx=0 for trip_id in iter(work_queue.get,'STOP'): idx=idx+1 print time.asctime(time.localtime()), "-", current_process().name, "-",idx, ". trip_id: ", trip_id[0], ", sub_trip: ", trip_id[1], ", stage: ", trip_id[2] the_set,chosen_overlap=generate_choice_set(this_network,trip_data[trip_id],master_config.choice_set_config,link_randomizer,master_config['time_dependent_relation'],trip_times[trip_id[0]],ext_bound) done_queue.put((trip_id[0],the_set,chosen_overlap)) done_queue.put('STOP') return True
def scheduler(list_key=Conf.Q_LIST): """ Creates a task from a schedule at the scheduled time and schedules next run """ for s in Schedule.objects.exclude(repeats=0).filter(next_run__lt=timezone.now()): args = () kwargs = {} # get args, kwargs and hook if s.kwargs: try: # eval should be safe here cause dict() kwargs = eval('dict({})'.format(s.kwargs)) except SyntaxError: kwargs = {} if s.args: args = ast.literal_eval(s.args) # single value won't eval to tuple, so: if type(args) != tuple: args = (args,) q_options = kwargs.get('q_options', {}) if s.hook: q_options['hook'] = s.hook # set up the next run time if not s.schedule_type == s.ONCE: next_run = arrow.get(s.next_run) if s.schedule_type == s.HOURLY: next_run = next_run.replace(hours=+1) elif s.schedule_type == s.DAILY: next_run = next_run.replace(days=+1) elif s.schedule_type == s.WEEKLY: next_run = next_run.replace(weeks=+1) elif s.schedule_type == s.MONTHLY: next_run = next_run.replace(months=+1) elif s.schedule_type == s.QUARTERLY: next_run = next_run.replace(months=+3) elif s.schedule_type == s.YEARLY: next_run = next_run.replace(years=+1) s.next_run = next_run.datetime s.repeats += -1 # send it to the cluster q_options['list_key'] = list_key q_options['group'] = s.name or s.id kwargs['q_options'] = q_options s.task = tasks.async(s.func, *args, **kwargs) # log it if not s.task: logger.error( _('{} failed to create a task from schedule [{}]').format(current_process().name, s.name or s.id)) else: logger.info( _('{} created a task from schedule [{}]').format(current_process().name, s.name or s.id)) # default behavior is to delete a ONCE schedule if s.schedule_type == s.ONCE: if s.repeats < 0: s.delete() return # but not if it has a positive repeats s.repeats = 0 # save the schedule s.save()
def generate_instances(self, sentences, child_conn): # Each process has its own NLTK PoS-tagger tagger = load('taggers/maxent_treebank_pos_tagger/english.pickle') instances = list() while True: try: s = sentences.get_nowait() if sentences.qsize() % 500 == 0: print(multiprocessing.current_process(), \ "Instances to process", sentences.qsize()) sentence = Sentence(s, self.config.e1_type, self.config.e2_type, self.config.max_tokens_away, self.config.min_tokens_away, self.config.context_window_size, tagger, self.config) for rel in sentence.relationships: t = Tuple(rel.e1, rel.e2, rel.sentence, rel.before, rel.between, rel.after, self.config) instances.append(t) except queue.Empty: print(multiprocessing.current_process(), "Queue is Empty") pid = multiprocessing.current_process().pid child_conn.send((pid, instances)) break
def TestCustomLogging(l, n, s): for i in range(n): l.info("Info - {}- {}".format(multiprocessing.current_process().name, time.strftime("%d.%m.%Y %H:%M:%S", time.gmtime()))) l.error("Error - {} - {}".format(multiprocessing.current_process().name, time.strftime("%d.%m.%Y %H:%M:%S", time.gmtime()))) time.sleep(0.2) if not s.is_set(): s.set()
def monitor(result_queue, broker=None): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ if not broker: broker = get_broker() name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, 'STOP'): # save the result if task.get('cached', False): save_cached(task, broker) else: save_task(task, broker) # acknowledge and log the result if task['success']: # acknowledge ack_id = task.pop('ack_id', False) if ack_id: broker.acknowledge(ack_id) # log success logger.info(_("Processed [{}]").format(task['name'])) else: # log failure logger.error(_("Failed [{}] - {}").format(task['name'], task['result'])) logger.info(_("{} stopped monitoring results").format(name))
def cluster_tuples_parallel(self, patterns, matched_tuples, child_conn): updated_patterns = list(patterns) count = 0 for t in matched_tuples: count += 1 if count % 500 == 0: print(multiprocessing.current_process(), count, \ "tuples processed") # go through all patterns(clusters of tuples) and find the one with # the highest similarity score max_similarity = 0 max_similarity_cluster_index = 0 for i in range(0, len(updated_patterns)): extraction_pattern = updated_patterns[i] accept, score = self.similarity_all(t, extraction_pattern) if accept is True and score > max_similarity: max_similarity = score max_similarity_cluster_index = i # if max_similarity < min_degree_match create a new cluster if max_similarity < self.config.threshold_similarity: c = Pattern(t) updated_patterns.append(c) # if max_similarity >= min_degree_match add to the cluster with # the highest similarity else: updated_patterns[max_similarity_cluster_index].add_tuple(t) # Eliminate clusters with two or less patterns new_patterns = [p for p in updated_patterns if len(p.tuples) > 5] pid = multiprocessing.current_process().pid print(multiprocessing.current_process(), "Patterns: ", len(new_patterns)) child_conn.send((pid, new_patterns))
def __repr__(self): "x.__repr__() <==> repr(x)" name = self.__class__.__name__ if hasattr(self, "channel") and self.channel is not None: channel = "/%s" % self.channel else: channel = "" q = len(self._queue) c = len(self.channels) h = len(self._handlers) state = self.state if HAS_MULTIPROCESSING == 2: pid = current_process().ident if HAS_MULTIPROCESSING == 1: pid = current_process().getPid() else: pid = os.getpid() if pid: id = "%s:%s" % (pid, current_thread().getName()) else: id = current_thread().getName() format = "<%s%s %s (queued=%d, channels=%d, handlers=%d) [%s]>" return format % (name, channel, id, q, c, h, state)
def pusher(task_queue, event, broker=None): """ Pulls tasks of the broker and puts them in the task queue :type task_queue: multiprocessing.Queue :type event: multiprocessing.Event """ if not broker: broker = get_broker() logger.info(_('{} pushing tasks at {}').format(current_process().name, current_process().pid)) while True: try: task_set = broker.dequeue() except Exception as e: logger.error(e) # broker probably crashed. Let the sentinel handle it. sleep(10) break if task_set: for task in task_set: ack_id = task[0] # unpack the task try: task = signing.SignedPackage.loads(task[1]) except (TypeError, signing.BadSignature) as e: logger.error(e) broker.fail(ack_id) continue task['ack_id'] = ack_id task_queue.put(task) logger.debug(_('queueing from {}').format(broker.list_key)) if event.is_set(): break logger.info(_("{} stopped pushing tasks").format(current_process().name))
def main(): # get data from parent over stdin data = pickle.load(sys.stdin) sys.stdin.close() # set some stuff _logger.setLevel(data['dist_log_level']) forking.prepare(data) # create server for a `HostManager` object server = managers.Server(HostManager._registry, ('', 0), data['authkey']) current_process()._server = server # report server address and number of cpus back to parent conn = connection.Client(data['parent_address'], authkey=data['authkey']) conn.send((data['index'], server.address, slot_count)) conn.close() # set name etc current_process().set_name('Host-%s:%s' % server.address) util._run_after_forkers() # register a cleanup function def cleanup(directory): debug('removing directory %s', directory) shutil.rmtree(directory) debug('shutting down host manager') util.Finalize(None, cleanup, args=[data['dir']], exitpriority=0) # start host manager debug('remote host manager starting in %s', data['dir']) server.serve_forever()
def monitor(result_queue, broker=None): """ Gets finished tasks from the result queue and saves them to Django :type result_queue: multiprocessing.Queue """ if not broker: broker = get_broker() name = current_process().name logger.info(_("{} monitoring at {}").format(name, current_process().pid)) for task in iter(result_queue.get, "STOP"): # acknowledge ack_id = task.pop("ack_id", False) if ack_id: broker.acknowledge(ack_id) # save the result if task.get("cached", False): save_cached(task, broker) else: save_task(task) # log the result if task["success"]: logger.info(_("Processed [{}]").format(task["name"])) else: logger.error(_("Failed [{}] - {}").format(task["name"], task["result"])) logger.info(_("{} stopped monitoring results").format(name))
def dequeue(self): while not self.is_quit(): t = None try: t = self.queue.get(True) except IOError: # Anticipate Ctrl-C #print("Quit W1: %s" % self.name) self.quit.value = 1 break if isinstance(t, tuple): self.out_counter.increment() self.worker_out_counter.increment() topic = t[0] msg = t[1] ctime = t[2] if isinstance(msg, rospy.Message): doc = ros_datacentre.util.msg_to_document(msg) doc["__recorded"] = ctime or datetime.now() doc["__topic"] = topic try: #print(self.sep + threading.current_thread().getName() + "@" + topic+": ") #pprint.pprint(doc) self.collection.insert(doc) except InvalidDocument, e: print("InvalidDocument " + current_process().name + "@" + topic +": \n") print e except InvalidStringData, e: print("InvalidStringData " + current_process().name + "@" + topic +": \n") print e
def main(OBJECTID, lck, count, length, getArea=False): """ OBJECTID - the objectid of the feature from the wfs service lck - multiprocess lock count - how many features have been processed length - the total number of features to be processed getArea - boolean flag to indicate whether to capture the area of intersection """ try: logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s',) logging.info(str(os.getpid()) + " OBJECTID " + str(OBJECTID) + " (" + str(count) + " out of " + str(length) + ")") multiprocessing.current_process().cnt += 1 conn = dbconnect('species_especies_schema') # connect to PostGIS # intersect the species range features with the intersectingfeature features if getArea: # populate the area using the intersection area between the wdpa and the species conn.cur.execute("SELECT * from especies.intersect_species_wdpa_area(%s,false)" % OBJECTID) else: conn.cur.execute("SELECT * from especies.intersect_species_wdpa(%s,false)" % OBJECTID) intersectingfeatures = conn.cur.fetchall() # get all of the intersecting PAs for the species if len(intersectingfeatures) > 0: for intersectingfeature in intersectingfeatures: # iterate through the intersectingfeatures if getArea: # populate the output table conn.cur.execute("SELECT especies.insert_species_wdpa_area(%s,%s,%s,%s)" % (OBJECTID, intersectingfeature[1], intersectingfeature[2], intersectingfeature[3])) else: conn.cur.execute("SELECT especies.insert_species_wdpa(%s,%s,%s)" % (OBJECTID, intersectingfeature[1], intersectingfeature[2])) else: raise Exception("No intersecting features for OBJECTID %s" % OBJECTID) except Exception as inst: logging.error(str(os.getpid()) + " " + inst.args[0]) finally: conn.cur.close() del(conn)
def getTaskProcess(self): while True: if self.taskleft()>0: try: req = self.q_request.get(block=True,timeout=5) except: continue else: threadname=multiprocessing.current_process().name print threadname+'关闭' break with self.lock: #要保证该操作的原子性,进入critical area self.running=self.running+1 # self.lock.acquire() threadname=multiprocessing.current_process().name print '进程'+threadname+'发起请求: ' ans=self.do_job(self.job,req,threadname) # ans = self.connectpool.getConnect(req) # self.lock.release() self.q_finish.put((req,ans)) # self.lock.acquire() with self.lock: self.running= self.running-1 threadname=multiprocessing.current_process().name print '进程'+threadname+'完成请求'
def doStuff(self): """ This is the method that does the work """ while (not self.stop_event.is_set()) and (not self.waiting_queue.empty()): # Get a job from the queue try: self.waiting_lock.acquire() job = self.waiting_queue.get() except queue.Queue.Empty: break finally: self.waiting_lock.release() # Do the work print("{0}: Starting {1}".format(multiprocessing.current_process(), job)) time.sleep(1) print("{0}: Finished {1}".format(multiprocessing.current_process(), job)) time.sleep(1) # Put the result back on the result Queue. (Doesn't have to be the same object as Source Q) try: self.complete_lock.acquire() self.complete_queue.put(job) except queue.Queue.Empty: break finally: self.complete_lock.release()
def multi_procPatFiles0514(monthFiles): """ Processes the patent files for 2005 to 2014. Takes monthly patent .xml files as input and outputs .json files with python dictionaries containing the patent number, patent title, and patent abstract. Used to spawn multiprocess workers to work on a subset of the total monthly files. """ t0 = time.clock() print "Starting", multiprocessing.current_process().name, "minutes:{0}".format( round(float(time.clock() - t0) / 60.0, 2) ), "\n" # Extract patent number, title, and abstract for all patents in each file and export to json progSplit = splitter(monthFiles, 20) # for progress reporting for fileName in monthFiles: # Report progress if fileName in [i[-1] for i in progSplit]: prct = round(monthFiles.index(fileName) / float(len(monthFiles)), 3) prct = str(prct * 100)[: 4 + 2] print "{0} is {1}% complete".format(multiprocessing.current_process().name, prct) fileDate = fileName[4:12] f = open(dataPath + "/{0}".format(fileName), "r") content = f.readlines() absDict = extPatAbs0514(content) f.close() jf = open(mthAbsPath + "/patAbs{0}.json".format(fileDate), "w") json.dump(absDict, jf) jf.close() print "\n", "Exiting", multiprocessing.current_process().name, "minutes:{0}".format( round(float(time.clock() - t0) / 60.0, 2) ), "\n"
def multi_collapseYears(years): """ Processes the output from processing the monthly patent files. Takes the monthly .json files as input and outputs yearly .json dictionary files. Used to spawn multiprocess workers to work on a subset of the years to collapse. """ t0 = time.clock() print "Starting", multiprocessing.current_process().name, time.clock() - t0, "\n" monthFiles = sorted((fn for fn in os.listdir(mthAbsPath) if fn.startswith("patAbs") and len(fn) > 15)) # Combine monthly files into year files progSplit = splitter(years, 3) # for progress reporting for yr in years: # Report progress if yr in [i[-1] for i in progSplit]: prct = round(years.index(yr) / float(len(years)), 3) prct = str(prct * 100)[: 4 + 2] print "{0} is {1}% complete".format(multiprocessing.current_process().name, prct) filesToCollapse = [x for x in monthFiles if x[6:10] == yr] baseFile = open(mthAbsPath + "/{0}".format(filesToCollapse[0]), "r") baseDict = json.load(baseFile) for fileName in filesToCollapse: fileToAdd = open(mthAbsPath + "/{0}".format(fileName), "r") dictToAdd = json.load(fileToAdd) baseDict.update(dictToAdd) fileToAdd.close() baseFile.close() jf = open(absPath + "/patAbs{0}.json".format(yr), "w") json.dump(baseDict, jf) jf.close() print "\n", "Exiting", multiprocessing.current_process().name, "minutes:{0}".format( round(float(time.clock() - t0) / 60.0, 2) ), "\n"
def print_(object): import threading import sys # START OF CRITICAL SECTION __builtin__.__GIL__.acquire() try: import multiprocessing if multiprocessing.current_process().name == 'MainProcess': sys.stdout.write("<%s:%s> : %s\n" % (multiprocessing.current_process().name, threading.current_thread().name, object)) else: sys.stdout.write("<PID #%d> : %s\n" % (multiprocessing.current_process().pid, object)) except ImportError: sys.stdout.write("<%s> : %s\n" % (threading.current_thread().name, object)) sys.stdout.flush() __builtin__.__GIL__.release() # END OF CRITICAL SECTION return None
def multi_procPatFiles7601(yearFiles): """ Processes the patent files for 1976 to 2001. Takes yearly patent .dat files as input and outputs .json files with python dictionaries containing the patent number, patent title, and patent abstract. Used to spawn multiprocess workers to work on a subset of the total yearly files. """ t0 = time.clock() print "Starting", multiprocessing.current_process().name, "minutes:{0}".format( round(float(time.clock() - t0) / 60.0, 2) ), "\n" # Extract patent number, title, and abstract for all patents in each file and export to json progSplit = splitter(yearFiles, 4) # for progress reporting for fileName in yearFiles: # Report progress if fileName in [i[-1] for i in progSplit]: prct = round(yearFiles.index(fileName) / float(len(yearFiles)), 2) prct = str(prct * 100)[: 4 + 2] print "{0} is {1}% complete".format(multiprocessing.current_process().name, prct) fileYear = fileName[0:4] f = codecs.open(dataPath + "/{0}".format(fileName), encoding="ISO-8859-1") f = open(dataPath + "/{0}".format(fileName), "r") content = f.readlines() absDict = extPatAbs7601(content) f.close() jf = open(absPath + "/patAbs{0}.json".format(fileYear), "w") json.dump(absDict, jf) jf.close() print "\n", "Exiting", multiprocessing.current_process().name, "minutes:{0}".format( round(float(time.clock() - t0) / 60.0, 2) ), "\n"
def _threaded_resolve_AS(): """Get an ASN from the queue, resolve it, return its routes to the *main* process and repeat until signaled to stop. This function is going to be spawned as a thread. """ while True: current_AS = q.get() if current_AS == 'KILL': q.task_done() break try: resp = comm.get_routes_by_autnum(current_AS, ipv6_enabled=True) if resp is None: raise LookupError routes = parsers.parse_AS_routes(resp) except LookupError: logging.warning("{}: {}: No Object found for {}" .format(mp.current_process().name, threading.current_thread().name, current_AS)) routes = None except Exception as e: logging.error("{}: {}: Failed to resolve DB object {}. {}" .format(mp.current_process().name, threading.current_thread().name, current_AS, e)) routes = None result_q.put((current_AS, routes)) q.task_done()
def pusher(task_queue, event, list_key=Conf.Q_LIST, r=redis_client): """ Pulls tasks of the Redis List and puts them in the task queue :type task_queue: multiprocessing.Queue :type event: multiprocessing.Event :type list_key: str """ logger.info(_("{} pushing tasks at {}").format(current_process().name, current_process().pid)) while True: try: task = r.blpop(list_key, 1) except Exception as e: logger.error(e) # redis probably crashed. Let the sentinel handle it. sleep(10) break if task: # unpack the task try: task = signing.SignedPackage.loads(task[1]) except (TypeError, signing.BadSignature) as e: logger.error(e) continue task_queue.put(task) logger.debug(_("queueing from {}").format(list_key)) if event.is_set(): break logger.info(_("{} stopped pushing tasks").format(current_process().name))
def reducer(q_manager, project_drs, options): _logger.info(multiprocessing.current_process().name + ' with pid ' + str(os.getpid())) reducer_queue_consume(q_manager, project_drs, options) _logger.info(multiprocessing.current_process().name + ' with pid ' + str(os.getpid()) + ' finished cleanly.') return
def getTaskProcess(self): while True: array=[] if self.taskleft()>0: for i in range(10): try: req = self.q_request.get(block=True,timeout=1000) array.append(req) except: continue # break # req = self.q_request.get() with self.lock: #要保证该操作的原子性,进入critical area self.running=self.running+1 threadname=multiprocessing.current_process().name print '进程'+threadname+'发起请求: ' ans=self.do_job(self.job,req,threadname) # ans = self.connectpool.getConnect(req) # self.lock.release() if self.needfinishqueue>0: self.q_finish.put((req,ans)) # self.lock.acquire() with self.lock: self.running= self.running-1 threadname=multiprocessing.current_process().name print '进程'+threadname+'完成请求'
def worker(work_queue, done_queue): try: for url in iter(work_queue.get, 'STOP'): status_code = print_site_status(url) done_queue.put("%s | %s | %s" % (current_process().name, url, status_code)) except Exception, e: done_queue.put("%s | %s | %s | %s" % (current_process().name, url,5000,e.message))
def _remove_adapters(self, adapter, info_file, sum_file, tmp_decontam_fastq, tmp_rmadapter_fastq): sys.stdout.write("[Preqc] %s removing adapters from %s" %(multiprocessing.current_process().name, tmp_decontam_fastq)) cutadapt_cmd = " cutadapt -b %s -O %d -m %d --info-file %s -o %s %s " %(adapter, self.min_overlap, self.min_readlen, info_file, tmp_rmadapter_fastq, tmp_decontam_fastq) sys.stdout.write(multiprocessing.current_process().name + "\t" + cutadapt_cmd + "\n") p = subprocess.Popen(shlex.split(cutadapt_cmd), stdout=open(sum_file, 'w')) p.wait() return p.returncode
def run(queue, params): """ Start a publisher thread to publish forever """ try: (dbFile, dbBackupDir, times, submitType) = getStartParams(params) log.info('{0} started'.format(current_process().name)) commandQueue = Queue.Queue() interruptRequest = threading.Event() publishThread = threading.Thread(target=publishForever, args=(commandQueue, interruptRequest, times, submitType, dbFile, dbBackupDir), name="PublishThread") publishThread.start() dispatch = {'stop' : stopCommandHandler, 'set' : setCommandHandler} while not stopRequest.isSet(): (command, params) = queue.get() log.info('{0} recieved command: [{1}]'.format(current_process().name, str(command))) try: dispatch[command](interruptRequest, commandQueue, params) queue.put(Response()) except Exception as ex: queue.put(Response(message=str(ex), status=ResponseStatus.ERROR)) log.info('{0} Waiting for {1} to stop'.format(current_process().name, publishThread.name)) publishThread.join() log.info('{0} ... OK'.format(current_process().name)) except Exception as ex: log.exception(repr(ex)) log.info('{0} terminated'.format(__name__))
def Map(L): sentence_max = 0 #temp_file = get_tempfile(L) text = prepjob(L) #data_file = load(temp_file) local_words = {} print multiprocessing.current_process().name, 'to map region:', \ L[1],"to",L[2] while True: raw_line = text.readline() if not raw_line: break pass for sentence in splitSentence(raw_line): for (i, word) in enumerate(sentence.split()): if i > sentence_max: sentence_max = i if not word.isspace(): sanitized = sanitize(word).lower() local_words[sanitized] = incrementTuple(i, local_words.get(sanitized, (0, {}))) out = [] sum = 0 for (key, value) in local_words.items(): if key is not '' and value is not None: sum += value[0] out.append((key, value)) print multiprocessing.current_process().name, 'mapped tokens:', \ sum, 'sentence max:', sentence_max #data_file.close() #os.remove(temp_file) return out
def proc1(pipe): print(multiproc.current_process().pid) print(os.getpid()) # 和上面方法一样,可以获得当前进程的pid pipe.send([('Hello , my name is process %s !' % multiproc.current_process().pid), multiproc.current_process().pid]) pipe.send([('This is process %s second send !' % multiproc.current_process().pid), multiproc.current_process().pid]) print('waiting....') print(pipe.recv())
def currentWorkerName(): return current_process().name
def _sub_st_loc_map(args): testlist, iou_th, i_th, nclasses, gt, on_keyframes, eval_recall = args testtracks = global_tracks[i_th] _MAXDET = int(1e3 * len(testlist)) log_str = '' proc = multiprocessing.current_process().name log_str += '\n\nST-loc @IoU=%.02f' % (iou_th) if eval_recall: final_str = '\nMEAN:\n ST-RECALL = %.3f / ANNOT-RECALL = %.3f' else: final_str = '\nmAP = %.3f / rec = %.3f' aps = np.zeros(nclasses) recs = np.zeros(nclasses) for c in range(nclasses): nodetstr = '' num_positives = 0 num_annotations = 0 ctracks = testtracks[c] tp = np.zeros(_MAXDET, dtype=bool) fp = np.zeros(_MAXDET, dtype=bool) scores = np.zeros(_MAXDET, dtype=float) i_det = 0 # next idx to fill for v in testlist: gts = gt[v] assert len(gts['gts'] ) > 0, 'This was supposed to be handled in loadtracks' gttracks = get_class_gt(gts, c) ngts = get_num_positives(gttracks) num_positives += ngts if not v in ctracks: if ngts > 0: nodetstr += '%s (%d GTs) ' % (v, ngts) continue tracks = testtracks[c][v] ntracks = len(tracks) prev_i_det = i_det if eval_recall: recalled_gt, recalled_annot, n_annot = append_tpfp( tp, fp, scores, tracks, gttracks, i_det, iou_th, on_keyframes, overlap_only=True, eval_recall=eval_recall) num_annotations += n_annot aps[c] += recalled_gt recs[c] += recalled_annot continue i_det = append_tpfp(tp, fp, scores, tracks, gttracks, i_det, iou_th, on_keyframes) assert num_positives > 0, 'No GT for class %d' % (c + 1) if eval_recall: aps[c] = aps[c] / num_positives # ST recall recs[c] = recs[c] / num_annotations # annot recall log_str += '\nClass %d ST-recall = %.3f - Annot-recall = %.3f' % ( c + 1, aps[c], recs[c]) continue c_ap, rec, issame = get_ap(tp, fp, scores, i_det, num_positives) aps[c] = c_ap recs[c] = rec # disp info if nodetstr != '': log_str += '\nNo detection for %s' % (nodetstr) if issame: log_str += '\nWARNING SOME SCORES ARE EXACTLY EQUAL, ORDER MIGHT MATTER!' # compute nb of FP in the first 50% of TP s_tp = tp[:i_det] tp_idx = np.linspace(0, i_det, i_det, dtype=int) tp_idx = tp_idx[s_tp] tpnum = len(tp_idx) if tpnum >= 50: fp_at_50 = fp[:tp_idx[49]].sum() # FP in first TOP50 else: fp_at_50 = fp.sum() log_str += '\nClass %d AP = %.3f (max recall = %.3f) - P/TP/FP/FP@50 %d/%d/%d/%d' % ( c + 1, c_ap, rec, num_positives, tpnum, fp.sum(), fp_at_50) log_str += final_str % (aps.mean(), recs.mean()) return iou_th, i_th, aps, log_str
def _loadvideotracks(args): v, trackpath, nclasses, regress, linear_regressor, track_class_agnostic, scale, normalization_fn = args if not linear_regressor is None: assert not regress proc = multiprocessing.current_process().name if proc == "PoolWorker-1": print v, proc vidpath = '%s/%s' % (trackpath[0], v) numExp = len(trackpath) orignal_file_path = vidpath + '.pkl' if os.path.exists(orignal_file_path): # the original track path has been detected (tracks are not scored) useful for recall computation from_original_file = True with open(orignal_file_path) as f: original_file = pickle.load(f) ntracks = len(original_file['tracks']) assert numExp == 1 else: from_original_file = False if os.path.exists(vidpath): tracklist = os.listdir(vidpath) ntracks = len(tracklist) else: ntracks = 0 if ntracks == 0: print 'Warning: no intput track found in %s' % vidpath loadedtracks = [{v: []} for i in range(nclasses)] for t in range(ntracks): for exp in range(numExp): if from_original_file: tfile = original_file['tracks'][t] tfile['scores'] = np.zeros([tfile['N_frames'], nclasses + 1]) # make dummy scores else: vidpath = '%s/%s' % (trackpath[exp], v) tpath = '%s/track%05d.pkl' % (vidpath, t + 1) with open(tpath) as f: tfile = pickle.load(f) nframes = tfile['tbound'][1] - tfile['tbound'][0] + 1 assert nframes == tfile['boxes'].shape[0] and nframes == tfile[ 'N_frames'] if 'box_label' in tfile: tlabel = tfile['box_label'] else: assert track_class_agnostic, 'if there is no label for the track we must be in class agnostic mode' tlabel = -1 if not track_class_agnostic: assert tlabel > 0, 'if we do not consider all tracks (not class agnostic) then tracks have to be labeled' allscore = tfile['scores'] if not (type(tfile['scores']) == np.ndarray): allscore = allscore.numpy() if normalization_fn is not None: if normalization_fn == 'softmax': allscore = softmax(allscore) else: raise ValueError('unknown normalization function') for c in range(nclasses): if (not track_class_agnostic and tlabel != c + 1): continue cscore = allscore[:, c] / numExp if cscore.shape[0] != nframes: # there is one score per feature, we have to duplicate f2c = tfile['frame2chunk'] nChunks = cscore.shape[0] assert nChunks == (f2c[-1] - f2c[0] + 1), 'there is not one pred per chunk!' c0 = f2c[0] _dupl = [(i_c + c0 == f2c).sum() for i_c in range(nChunks)] cscore = np.repeat(cscore, _dupl) assert cscore.shape[0] == nframes if exp == 0: track = {} fields = ['N_frames', 'tbound'] if regress: track['boxes'] = tfile['reg_boxes'][c] elif not linear_regressor is None: track['boxes'] = utils.apply_lin_regressor( tfile['boxes'], linear_regressor[c], tfile['WH_size']) else: fields.append('boxes') if 'track_score' in tfile: track['det_track_score'] = tfile['track_score'] for ff in fields: track[ff] = tfile[ff] track['videoname'] = v track['scores'] = cscore loadedtracks[c][v].append(track) cmpboxes = tfile['boxes'] if scale > 0: # scale boxes if one scale has been passed boxes = np.array(track['boxes']) # clone it utils.scaleboxes_(boxes, scale) if 'WH_size' in tfile: WHmax = tfile['WH_size'] else: WHmax = (320, 240) boxes = utils.clipboxes(boxes, WHmax) track['boxes'] = boxes else: track = loadedtracks[c][v][-1] track['scores'] += cscore assert (cmpboxes - tfile['boxes']).sum( ) == 0, 'Tracks to combine do not have same boxes' return loadedtracks
def foo(): name = multiprocessing.current_process().name print('Starting %s\n' % name) time.sleep(3) print('Exiting %s\n' % name)
def sing(): sing_process_id = os.getpid() # 获取当前进程对象,查看当前进程是由哪个代码执行的:multiprocessing.current_process() print('sing_process_id:', sing_process_id, multiprocessing.current_process()) # 获取这个子进程的父进程编号 sing_process_parents_id = os.getppid() print('sing_process_parents_id的主进程编号是:', sing_process_parents_id) for i in range(3): print('唱歌中。。。') time.sleep(0.3) # 获取当前进程编号 main_process_id = os.getpid() # 获取当前进程对象,查看当前进程是由哪个代码执行的:multiprocessing.current_process() print('main_process_id:', main_process_id, multiprocessing.current_process()) # 2.创建子进程(自己手动创建的进程为子进程) # 参数:group:进程组,目前只能使用None,一般不需要设置 target:进程执行的目标任务 name:进程名,如果不设置,默认是Process -1 dance_process = multiprocessing.Process(target=dance) print('dance_process:', dance_process) sing_process = multiprocessing.Process(target=sing) print('sing_process:', sing_process) # 3.启动进程执行对应的任务 dance_process.start() sing_process.start() # 进程是无序的,具体限执行哪个,是由操作系统调度决定的
def worker(s, i): s.acquire() print(multiprocessing.current_process().name + "acquire") time.sleep(i) print(multiprocessing.current_process().name + "release\n") s.release()
def dynamic_zpm_temp(arguments,ddw_save,ddw_save2,type,temp_info,smearing,eig0,degen,energy,gaussian_smearing): if type == 1 or type == 2: nbqpt,wtq,eigq_files,DDB_files,EIGR2D_files,GKK_files = arguments GKKterm = system(directory='.',filename=GKK_files) GKK = GKKterm.GKK_bis elif type == 3: nbqpt,wtq,eigq_files,DDB_files,EIGR2D_files = arguments DDB = system(directory='.',filename=DDB_files) EIGR2D = system(directory='.',filename=EIGR2D_files) ntemp = len(temp_info) if type == 1 or type == 2: total_corr = zeros((4+2*len(energy),ntemp,EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband),dtype=complex) elif type == 3: total_corr = zeros((4+len(energy),ntemp,EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband),dtype=complex) eigq = system(directory='.',filename=eigq_files) # If the calculation is on a Homogenous q-point mesh # retreve the weight of the q-point if (wtq == 0): wtq = EIGR2D.wtq wtq = wtq[0] # Current Q-point calculated print("Q-point: ",nbqpt," with wtq =",wtq," and reduced coord.",EIGR2D.iqpt) current = multiprocessing.current_process() file_name = str('PYLOG_')+str(current.pid) if os.path.isfile(file_name) : with open(file_name,'a') as F: F.write("Q-point: "+str(nbqpt)+" with wtq ="+str(wtq)+" and reduced coord."+str(EIGR2D.iqpt)+"\n") else: with open(file_name,'w') as F: F.write("Q-point: "+str(nbqpt)+" with wtq ="+str(wtq)+" and reduced coord."+str(EIGR2D.iqpt)+"\n") # Find phonon freq and eigendisplacement from _DDB omega,eigvect,gprimd=compute_dynmat(DDB) # Compute the displacement = eigenvectors of the DDB. # Due to metric problem in reduce coordinate we have to work in cartesian # but then go back to reduce because our EIGR2D matrix elements are in reduced coord. fan_corr = zeros((ntemp,EIGR2D.nkpt,EIGR2D.nband,EIGR2D.nsppol),dtype=complex) ddw_corr = zeros((ntemp,EIGR2D.nkpt,EIGR2D.nband,EIGR2D.nsppol),dtype=complex) fan_add = N.array(zeros((ntemp,EIGR2D.nkpt,EIGR2D.nband,EIGR2D.nsppol),dtype=complex)) ddw_add = N.array(zeros((ntemp,EIGR2D.nkpt,EIGR2D.nband,EIGR2D.nsppol),dtype=complex)) bose = get_bose(EIGR2D.natom,omega,temp_info) # bose (imode, itemp) # Get reduced displacement (scaled with frequency) displ_red_FAN2,displ_red_DDW2 = get_reduced_displ(EIGR2D.natom,eigvect,omega,gprimd) # displ_red(mode,atom1,atom2,dir1,dir2) # Einstein sum make the vector matrix multiplication ont the correct indices fan_corrQ = N.einsum('iojklmn,plnkm->pijo',EIGR2D.EIG2D_bis,displ_red_FAN2) ddw_corrQ = N.einsum('iojklmn,plnkm->pijo',ddw_save,displ_red_DDW2) # fan_corrQ(mode,kpt,band,spin) # Sum over the modes with bose + reshape to (itemp,ispin,ikpt,iband) fan_corr = N.einsum('ijkl,im->mljk',fan_corrQ,2*bose+1.0) ddw_corr = N.einsum('ijkl,im->mljk',ddw_corrQ,2*bose+1.0) omegatmp = omega[:].real # imode if type == 3: imag_fan_add = 0.0 elif type == 1 or type == 2: print("Now compute active space ...") # Now compute active space # sum over atom1,dir1 temp = N.einsum('iqjklm,nlokp->ijmnpoq',GKK,displ_red_FAN2) fan_addQ = N.einsum('ijklmnq,iqjmnk->ijklq',temp,N.conjugate(GKK)) # fan_addQ(nkpt,nband,nband,imode,ispin) temp = N.einsum('iqjklm,nlokp->ijmnpoq',ddw_save2,displ_red_DDW2) ddw_addQ = N.einsum('ijklmnq,iqjmnk->ijklq',temp,N.conjugate(ddw_save2)) # fan_addQ(nkpt,nband,nband,imode,ispin) occtmp = EIGR2D.nsppol*EIGR2D.occ[:,:,:]/2 # jband # should be 1 ! delta_E_ddw = N.einsum('lij,k->lijk',eig0[:,:,:].real,N.ones(EIGR2D.nband)) - \ N.einsum('lij,k->likj',eig0[:,:,:].real,N.ones(EIGR2D.nband)) - \ N.einsum('i,ljk->ljik',N.ones((EIGR2D.nband)),(2*occtmp-1))*smearing*1j # spin,ikpt,iband,jband ddw_tmp = N.einsum('ijkln,lm->mijkn',ddw_addQ,2*bose+1.0) # itemp,ikpt,iband,jband,ispin ddw_add = N.einsum('ijklm,mjkl->imjk',ddw_tmp,1.0/delta_E_ddw) # temp,spin,ikpt,iband delta_E = N.einsum('lij,k->lijk',eig0[:,:,:].real,N.ones(EIGR2D.nband)) - \ N.einsum('lij,k->likj',eigq.EIG[:,:,:].real,N.ones(EIGR2D.nband)) # spin,ikpt,iband,jband delta_E_sm = N.einsum('i,ljk->ljik',N.ones((EIGR2D.nband)),(2*occtmp-1))*smearing*1j # spin,ikpt,iband,jband num1 = N.einsum('ij,mkl->mkijl',bose,N.ones((EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband))) +1.0 \ - N.einsum('ij,mkl->mkijl',N.ones((3*EIGR2D.natom,ntemp)),occtmp) # spin,k,mod,temp,band # bef was (imode,tmp,band) deno1 = N.einsum('mijk,l->mijkl',delta_E,N.ones(3*EIGR2D.natom),dtype=N.complex) if type==1: # dynamic deno1 -= N.einsum('mijk,l->mijkl',N.ones((EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband,EIGR2D.nband)),omegatmp) #spin,ikpt,iband,jband,imode imag_part1 = N.pi*gaussian(deno1,0.0,gaussian_smearing) #imag_part1 = N.pi*fermidirac(deno1,0.0,gaussian_smearing) #imag_part1 = N.pi*lorentzian(deno1,0.0,gaussian_smearing) #imag_part1 = gaussian_smearing/(deno1*deno1 + gaussian_smearing*gaussian_smearing) deno1 += N.einsum('mijk,l->mijkl',delta_E_sm,N.ones(3*EIGR2D.natom),dtype=N.complex) div1 = N.einsum('ijklm,ijnmk->iklmjn',num1,1.0/deno1) # (spin,k,mod,temp,jband)/(spin,ikpt,iband,jband,mode) => (ispin,imod,tmp,jband,ikpt,iband) num2 = N.einsum('ij,mkl->mkijl',bose,N.ones((EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband))) \ + N.einsum('ij,mkl->mkijl',N.ones((3*EIGR2D.natom,ntemp)),occtmp) #imode,tmp,jband deno2 = N.einsum('mijk,l->mijkl',delta_E,N.ones((3*EIGR2D.natom),dtype=N.complex)) if type==1: # dynamic deno2 += N.einsum('mijk,l->mijkl',N.ones((EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband,EIGR2D.nband)),omegatmp) #spin,ikpt,iband,jband,imode imag_part2 = N.pi*gaussian(deno2,0.0,gaussian_smearing) #imag_part2 = N.pi*fermidirac(deno2,0.0,gaussian_smearing) #imag_part2 = N.pi*lorentzian(deno2,0.0,gaussian_smearing) #imag_part2 = gaussian_smearing/(deno2*deno2 + gaussian_smearing*gaussian_smearing) deno2 -= N.einsum('mijk,l->mijkl',delta_E_sm,N.ones(3*EIGR2D.natom),dtype=N.complex) div2 = N.einsum('ijklm,ijnmk->iklmjn',num2,1.0/deno2) # (spin,k,mod,temp,jband)/(spin,ikpt,iband,jband,mode) => (ispin,imod,tmp,jband,ikpt,iband) fan_add = N.einsum('ijklq,qlmkij->mqij',fan_addQ,div1+div2) # (k,iband,jband,imod,ispin) * (spin,imod,tmp,jband,ikpt,iband) => (temp,ispin,ikpt,iband) imag_div1 = N.einsum('ijklm,ijnmk->iklmjn',num1,imag_part1) # (spin,k,mod,temp,jband)/(spin,ikpt,iband,jband,mode) => (ispin,imod,tmp,jband,ikpt,iband) imag_div2 = N.einsum('ijklm,ijnmk->iklmjn',num2,imag_part2) # (spin,k,mod,temp,jband)/(spin,ikpt,iband,jband,mode) => (ispin,imod,tmp,jband,ikpt,iband) imag_fan_add = N.einsum('ijklq,qlmkij->mqij',fan_addQ,imag_div1+imag_div2) # (k,iband,jband,imod,ispin) * (spin,imod,tmp,jband,ikpt,iband) => (temp,ispin,ikpt,iband) print("Now compute generalized g2F Eliashberg electron-phonon spectral function ...") fan_tmp = N.einsum('ijklm->mijl',fan_addQ) # (ispin,ikpt, iband, imode) ddw_tmp = N.einsum('ijklm->mijl',ddw_addQ) # (ispin,ikpt, iband, imode) g_kk = fan_tmp - ddw_tmp # Eliashberg function a2F = zeros((len(energy),EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband),dtype=complex) for ifreq,freq in enumerate(energy): for imode in N.arange(3*EIGR2D.natom): a2F[ifreq,:,:,:] += g_kk[:,:,:,imode]*gaussian(freq,omegatmp[imode],gaussian_smearing) fan_corr += fan_add ddw_corr += ddw_add # PDOS PDOS = zeros((len(energy)),dtype=complex) ii = 0 for ifreq,freq in enumerate(energy): for imode in N.arange(3*EIGR2D.natom): PDOS[ifreq] += gaussian(freq,omegatmp[imode],gaussian_smearing) # From the equations ddw_corr has no physical imaginary part ddw_corr.imag = 0.0 eigen_corr = (fan_corr[:,:,:,:] - ddw_corr[:,:,:,:])*wtq total_corr[0,:,:,:,:] = eigen_corr[:,:,:,:] total_corr[1,:,:,:,:] = fan_corr[:,:,:,:]*wtq total_corr[2,:,:,:,:] = ddw_corr[:,:,:,:]*wtq total_corr[3,:,:,:,:] = imag_fan_add*wtq total_corr[4:len(energy)+4,0,0,0,0] = PDOS if type == 1 or type == 2: total_corr[4+len(energy):len(energy)*2+4,0,:,:,:] = a2F total_corr = make_average(EIGR2D.nsppol,EIGR2D.nkpt,EIGR2D.nband,degen,total_corr,temp=True) return total_corr
def process_age_records(age_records, sv_type="INS", ins_min_unaligned=10, min_interval_len=200, pad=500, min_deletion_len=30, min_del_subalign_len=MIN_DEL_SUBALIGN_LENGTH, min_inv_subalign_len=MIN_INV_SUBALIGN_LENGTH, dist_to_expected_bp=400, age_window=AGE_WINDOW_SIZE, pad_ins=0, sc_locations=[]): func_logger = logging.getLogger( "%s-%s" % (process_age_records.__name__, multiprocessing.current_process())) good_age_records = age_records if sv_type == "INS": good_age_records = [ age_record for age_record in good_age_records if not age_record.almost_all_bases_aligned(ins_min_unaligned) ] good_age_records = [ age_record for age_record in good_age_records if not age_record.is_reference() ] elif sv_type == "DEL": good_age_records = [ age_record for age_record in good_age_records if len(age_record.start1_end1s) == 2 and min(age_record.ref_flanking_regions) >= min_del_subalign_len ] good_age_records = [ age_record for age_record in good_age_records if abs(age_record.start1_end1s[0][1] - age_record.start1_end1s[1][0]) >= min_deletion_len ] good_age_records = [ age_record for age_record in good_age_records if float(age_record.score) / sum(age_record.ref_flanking_regions) >= 0.7 ] good_age_records = [ age_record for age_record in good_age_records if abs(age_record.start2_end2s[0][1] - age_record.start2_end2s[1][0]) <= 50 ] good_age_records = [ age_record for age_record in good_age_records if check_closeness_to_bp( min(age_record.start1_end1s[0][1], age_record.start1_end1s[1] [0]), pad, dist_to_expected_bp, "L") and check_closeness_to_bp( max(age_record.start1_end1s[0][1], age_record.start1_end1s[1] [0]), pad, dist_to_expected_bp, "R", age_record.inputs[0].length) ] elif sv_type == "INV": good_age_records = [ age_record for age_record in good_age_records if len(age_record.start1_end1s) >= 2 and min(map(lambda x: abs(x[1] - x[0]), age_record.start1_end1s)) >= min_inv_subalign_len ] elif sv_type == "DUP": good_age_records = [ age_record for age_record in good_age_records if len(age_record.start1_end1s) == 2 and min(age_record.ref_flanking_regions) >= 100 ] else: pass # Add some features to an info dict info = defaultdict(int) info["BA_NUM_GOOD_REC"] = len(good_age_records) if not good_age_records: func_logger.warning("No good records found for getting breakpoints") return [], dict(info) for rec in good_age_records: info["BA_FLANK_PERCENT"] = int( max(info["BA_FLANK_PERCENT"], rec.flank_percent)) info["BA_NFRAGS"] = int(max(info["BA_NFRAGS"], rec.nfrags)) info["BA_NUM_ALT"] = int(max(info["BA_NUM_ALT"], rec.n_alt)) info["BA_PERCENT_MATCH"] = int( max(info["BA_PERCENT_MATCH"], rec.percent)) func_logger.info("Found %d good records for getting breakpoints" % (len(good_age_records))) func_logger.info("Good records") for age_record in good_age_records: func_logger.info(str(age_record)) sv_region = good_age_records[0].contig.sv_region if sv_type == "DEL": breakpoints = get_deletion_breakpoints(good_age_records, start=sv_region.pos1 - pad) elif sv_type == "INS": reference_intervals = get_reference_intervals( good_age_records, start=1, min_interval_len=min_interval_len) func_logger.info("Gathered reference intervals as %s" % (str(reference_intervals))) breakpoints = get_insertion_breakpoints( good_age_records, reference_intervals, expected_bp_pos=[ pad + pad_ins, max((sv_region.pos2 - sv_region.pos1) - pad_ins + pad, 0) ], window=age_window, start=sv_region.pos1 - pad) elif sv_type == "INV": breakpoints = get_inversion_breakpoints( good_age_records, start=sv_region.pos1 - pad, pad=pad, min_inv_subalign_len=min_inv_subalign_len, dist_to_expected_bp=dist_to_expected_bp) elif sv_type == "DUP": breakpoints = get_duplication_breakpoints( good_age_records, start=sv_region.pos1 - pad, pad=pad, dist_to_expected_bp=dist_to_expected_bp) else: return [], dict(info) func_logger.info("Detected breakpoints as %s" % (str(breakpoints))) # Add a few more features related to the breakpoints computed info["BA_NUM_BP"] = len(breakpoints) if sv_type == "DEL": if len(breakpoints) == 2: func_logger.info("True deletion interval %s" % (str(breakpoints))) else: func_logger.info("False deletion interval %s" % (str(breakpoints))) return [], dict(info) elif sv_type == "INS": if len(breakpoints) == 1: # if sv_region.pos2 - sv_region.pos1 <= 20: # info["BA_BP_SCORE"] = abs(breakpoints[0][0] - sv_region.pos1) # if abs(breakpoints[0][0] - sv_region.pos1) > 20: # return [], dict(info) # else: if not sc_locations: sc_locations = [ sv_region.pos1 + pad_ins, sv_region.pos2 - pad_ins ] min_diff = min( map(lambda x: abs(x - breakpoints[0][0]), sc_locations)) info["BA_BP_SCORE"] = min_diff if min_diff > 100: func_logger.info( "False insertion since resolved breakpoint not close to a soft-clip location" ) return [], dict(info) func_logger.info("True insertion interval %s" % (str(breakpoints))) else: return [], dict(info) elif sv_type == "INV": if len(breakpoints) == 2: func_logger.info("True inversion interval %s" % (str(breakpoints))) else: func_logger.info("False inversion interval %s" % (str(breakpoints))) return [], dict(info) elif sv_type == "DUP": if len(breakpoints) == 2: func_logger.info("True duplication interval %s" % (str(breakpoints))) else: func_logger.info("False duplication interval %s" % (str(breakpoints))) return [], dict(info) return breakpoints, dict(info)
def get_inversion_breakpoints(age_records, window=20, min_endpoint_dist=10, start=0, pad=500, dist_to_expected_bp=400, min_inv_subalign_len=MIN_INV_SUBALIGN_LENGTH): func_logger = logging.getLogger( "%s-%s" % (get_deletion_breakpoints.__name__, multiprocessing.current_process())) potential_breakpoints = [] for age_record in age_records: polarities = [ abs(age_record.polarities1[i] - age_record.polarities2[i]) for i in range(age_record.nfrags) ] good_intervals = [ i for i in range(age_record.nfrags) if abs(age_record.start1_end1s[i][1] - age_record.start1_end1s[i][0]) > min_inv_subalign_len and abs(age_record.start2_end2s[i][1] - age_record.start2_end2s[i][0]) > min_inv_subalign_len ] good_intervals = [ i for i in good_intervals if abs(age_record.start1_end1s[i][1] - age_record.start1_end1s[i][0]) <= max( age_record.inputs[0].length - 2 * (pad - dist_to_expected_bp), pad + dist_to_expected_bp) ] func_logger.info('Good intervals: %s' % str(good_intervals)) if len(good_intervals) < 2: func_logger.info( 'Not enough good interval for this age record: %s' % str(age_record)) continue candidate_inv_intervals = [] inv_interval = -1 long_inversion = False left_end_near_l_bp = filter( lambda x: check_closeness_to_bp(min(age_record.start1_end1s[ x]), pad, dist_to_expected_bp, "L"), good_intervals) right_end_near_r_bp = filter( lambda x: check_closeness_to_bp(max(age_record.start1_end1s[ x]), pad, dist_to_expected_bp, "R", age_record.inputs[0].length ), good_intervals) right_end_near_l_bp = filter( lambda x: check_closeness_to_bp(max(age_record.start1_end1s[ x]), pad, dist_to_expected_bp, "L"), good_intervals) left_end_near_r_bp = filter( lambda x: check_closeness_to_bp(min(age_record.start1_end1s[ x]), pad, dist_to_expected_bp, "R", age_record.inputs[0].length ), good_intervals) candidate_inv_intervals = list( set(left_end_near_l_bp) & set(right_end_near_r_bp)) candidate_norm_intervals = list( set(left_end_near_r_bp) | set(right_end_near_l_bp)) if len(candidate_inv_intervals) > 1 and len( candidate_norm_intervals) <= 1: candidate_inv_intervals = list( set(candidate_inv_intervals) - set(candidate_norm_intervals)) if len(candidate_inv_intervals) > 1: dist_to_exp_bps = map( lambda x: abs(min(age_record.start1_end1s[x]) - pad) + abs( max(age_record.start1_end1s[x]) - (age_record.inputs[0].length - pad)), candidate_inv_intervals) inv_interval = min(enumerate(dist_to_exp_bps), key=lambda x: x[1])[0] elif len(candidate_inv_intervals) == 1: inv_interval = candidate_inv_intervals[0] if inv_interval == -1: #Potentially long inversion candidate_inv_intervals=[i for i in left_end_near_l_bp if ((set(candidate_norm_intervals)&set(left_end_near_r_bp))-set([i]))] + \ [i for i in right_end_near_r_bp if ((set(candidate_norm_intervals)&set(right_end_near_l_bp))-set([i]))] if len(candidate_inv_intervals) > 1: candidate_inv_intervals=[i for i in set(candidate_inv_intervals)&set(left_end_near_l_bp) if (pad< (sum(age_record.start1_end1s[i])/2.0))] + \ [i for i in set(candidate_inv_intervals)&set(right_end_near_r_bp) if ((age_record.inputs[0].length-pad) > (sum(age_record.start1_end1s[i])/2.0))] if candidate_inv_intervals: func_logger.info('Potentially long-inversion interval: %s' % candidate_inv_intervals) long_inversion = True if len(candidate_inv_intervals) > 1: dist_to_exp_bps = map( lambda x: abs(min(age_record.start1_end1s[x]) - pad) if i in left_end_near_l_bp else abs( max(age_record.start1_end1s[x]) - (age_record.inputs[0].length - pad)), candidate_inv_intervals) inv_interval = min(enumerate(dist_to_exp_bps), key=lambda x: x[1])[0] else: inv_interval = candidate_inv_intervals[0] elif age_record.inputs[0].length > ((2 * pad + min_inv_subalign_len)): long_inversion = True if inv_interval == -1: func_logger.info( 'Not candidate inversion interval found for this age record: %s' % str(age_record)) continue func_logger.info('age_record: %s' % str(age_record)) func_logger.info('inverted interval: %s' % str(inv_interval)) candidate_norm_intervals = filter( lambda x: polarities[x] != polarities[inv_interval], set(candidate_norm_intervals) - set([inv_interval])) if long_inversion and (inv_interval not in set(left_end_near_l_bp) & set(right_end_near_r_bp)): candidate_norm_intervals = list( set(candidate_norm_intervals) & set(left_end_near_r_bp if (inv_interval in left_end_near_l_bp ) else right_end_near_l_bp)) if not candidate_norm_intervals: func_logger.info( 'Cannot find the normal interval for this age record: %s' % str(age_record)) continue if len(candidate_norm_intervals) > 1: candidate_norm_intervals = map( lambda x: (x, abs(age_record.start1_end1s[x][0] - age_record. start1_end1s[x][1])), set(candidate_norm_intervals)) norm_interval, norm_length = max(candidate_norm_intervals, key=lambda x: x[2]) else: norm_interval = candidate_norm_intervals[0] func_logger.info('norm_interval: %s' % str(norm_interval)) s_inv = sorted(age_record.start1_end1s[inv_interval]) s_norm = sorted(age_record.start1_end1s[norm_interval]) if (s_norm[0] - s_inv[0]) * (s_norm[1] - s_inv[1]) <= 0: func_logger.info('Bad intervals (one fully covers the other): %s' % str(age_record)) continue if not long_inversion: interval = age_record.start2_end2s[inv_interval] if min([ interval[0], abs(interval[0] - age_record.inputs[1].length), interval[1], abs(interval[1] - age_record.inputs[1].length) ]) < min_endpoint_dist: func_logger.info( 'Inverted interval end points are too close to borders in Seq2: %s' % str(age_record)) continue if (((s_norm[1] > s_inv[1]) and ((s_inv[1] - s_norm[0]) > 10)) or ((s_norm[0] < s_inv[0]) and ((s_norm[1] - s_inv[0]) > 10))): func_logger.info('Bad middle bp in seq1 (covers>10): %s' % str(age_record)) continue if (((s_norm[1] > s_inv[1]) and ((s_norm[0] - s_inv[1]) > 50)) or ((s_norm[0] < s_inv[0]) and ((s_inv[0] - s_norm[1]) > 50))): func_logger.info('Bad middle bp in seq1 (apart>50): %s' % str(age_record)) continue bp_idx = 0 if (s_norm[1] > s_inv[1]) else 1 bp1 = s_inv[bp_idx] bp2 = s_norm[bp_idx] bp1_seq2 = age_record.start2_end2s[inv_interval][filter( lambda x: age_record.start1_end1s[inv_interval][x] == bp1, [0, 1])[0]] bp2_seq2 = age_record.start2_end2s[norm_interval][filter( lambda x: age_record.start1_end1s[norm_interval][x] == bp2, [0, 1])[0]] if abs(bp1_seq2 - bp2_seq2) > 10: func_logger.info('BPs do not match in seq2: %s' % str(age_record)) continue potential_breakpoints += [bp1, bp2] potential_breakpoints = sorted(potential_breakpoints) breakpoints = [] for breakpoint in potential_breakpoints: if min([window + 1] + [abs(b - breakpoint) for b in breakpoints]) >= window: breakpoints.append(breakpoint) func_logger.info("Gathered breakpoints as %s" % (str(breakpoints))) return [start + breakpoint for breakpoint in breakpoints]
def worker( task_queue: Queue, result_queue: Queue, timer: Value, timeout: int = Conf.TIMEOUT ): """ Takes a task from the task queue, tries to execute it and puts the result back in the result queue :param timeout: number of seconds wait for a worker to finish. :type task_queue: multiprocessing.Queue :type result_queue: multiprocessing.Queue :type timer: multiprocessing.Value """ name = current_process().name logger.info(_(f"{name} ready for work at {current_process().pid}")) task_count = 0 if timeout is None: timeout = -1 # Start reading the task queue for task in iter(task_queue.get, "STOP"): result = None timer.value = -1 # Idle task_count += 1 # Get the function from the task logger.info(_(f'{name} processing [{task["name"]}]')) f = task["func"] # if it's not an instance try to get it from the string if not callable(task["func"]): try: module, func = f.rsplit(".", 1) m = importlib.import_module(module) f = getattr(m, func) except (ValueError, ImportError, AttributeError) as e: result = (e, False) if error_reporter: error_reporter.report() # We're still going if not result: close_old_django_connections() timer_value = task.pop("timeout", timeout) # signal execution pre_execute.send(sender="django_q", func=f, task=task) # execute the payload timer.value = timer_value # Busy try: res = f(*task["args"], **task["kwargs"]) result = (res, True) except Exception as e: result = (f"{e} : {traceback.format_exc()}", False) if error_reporter: error_reporter.report() if task.get("sync", False): raise with timer.get_lock(): # Process result task["result"] = result[0] task["success"] = result[1] task["stopped"] = timezone.now() result_queue.put(task) timer.value = -1 # Idle # Recycle if task_count == Conf.RECYCLE or rss_check(): timer.value = -2 # Recycled break logger.info(_(f"{name} stopped doing work"))
def parallel_proc(data): s, y = data print('{} began work'.format(multiprocessing.current_process().name)) n, x, x_hat, e, ao, F, Ao = lms_ic(3, s, y, mu=0.01) print('{} finished'.format(multiprocessing.current_process().name)) return ao
def get_duplication_breakpoints(age_records, window=20, max_endpoint_dist=10, start=0, pad=500, dist_to_expected_bp=400): func_logger = logging.getLogger( "%s-%s" % (get_deletion_breakpoints.__name__, multiprocessing.current_process())) potential_breakpoints = [] for age_record in age_records: left_end_near_l_bp = filter( lambda x: check_closeness_to_bp(min(age_record.start1_end1s[ x]), pad, dist_to_expected_bp, "L"), [0, 1]) right_end_near_r_bp = filter( lambda x: check_closeness_to_bp(max(age_record.start1_end1s[ x]), pad, dist_to_expected_bp, "R", age_record.inputs[0].length ), [0, 1]) if (not left_end_near_l_bp) or (not right_end_near_r_bp): func_logger.info('Not close to expected BPs: %s' % str(age_record)) continue if len(left_end_near_l_bp) == 2 and len(right_end_near_r_bp) == 1: left_end_near_l_bp = list( set(left_end_near_l_bp) - set(right_end_near_r_bp)) elif len(left_end_near_l_bp) == 1 and len(right_end_near_r_bp) == 2: right_end_near_r_bp = list( set(right_end_near_r_bp) - set(left_end_near_l_bp)) elif len(left_end_near_l_bp) == 2 and len(right_end_near_r_bp) == 2: dist_to_exp_l_bp = map( lambda x: abs(min(age_record.start1_end1s[x]) - pad), [0, 1]) dist_to_exp_r_bp = map( lambda x: abs( max(age_record.start1_end1s[x]) - (age_record.inputs[0].length - pad)), [0, 1]) left_end_near_l_bp, right_end_near_r_bp = [[0], [ 1 ]] if (dist_to_exp_l_bp[0] + dist_to_exp_r_bp[1]) < ( dist_to_exp_l_bp[1] + dist_to_exp_r_bp[0]) else [[1], [0]] l_interval = left_end_near_l_bp[0] r_interval = right_end_near_r_bp[0] bp_idx_l = 0 if age_record.start1_end1s[l_interval][ 0] < age_record.start1_end1s[l_interval][1] else 1 bp_idx_r = 1 if age_record.start1_end1s[r_interval][ 0] < age_record.start1_end1s[r_interval][1] else 0 if abs(age_record.start2_end2s[l_interval][bp_idx_l] - age_record.start2_end2s[r_interval][bp_idx_r]) > 10: func_logger.info('BPs do not match in seq2: %s' % str(age_record)) continue end_l_seq2 = age_record.start2_end2s[l_interval][1 - bp_idx_l] end_r_seq2 = age_record.start2_end2s[r_interval][1 - bp_idx_r] if max( min(end_r_seq2, end_l_seq2), min(end_l_seq2 - age_record.inputs[1].length, end_r_seq2 - age_record.inputs[1].length)) > max_endpoint_dist: func_logger.info( 'End points are too close to borders in Seq2: %s' % str(age_record)) continue potential_breakpoints += [ age_record.start1_end1s[l_interval][bp_idx_l], age_record.start1_end1s[r_interval][bp_idx_r] ] potential_breakpoints = sorted(potential_breakpoints) breakpoints = [] for breakpoint in potential_breakpoints: if min([window + 1] + [abs(b - breakpoint) for b in breakpoints]) >= window: breakpoints.append(breakpoint) func_logger.info("Gathered breakpoints as %s" % (str(breakpoints))) return [start + breakpoint for breakpoint in breakpoints]
def do_something(self): proc_name = multiprocessing.current_process().name print 'Doing something fancy in %s for %s!' % (proc_name, self.name)
def get_insertion_breakpoints(age_records, intervals, expected_bp_pos, window=AGE_WINDOW_SIZE, start=0, dist_to_expected_bp=50): func_logger = logging.getLogger("%s-%s" % (get_insertion_breakpoints.__name__, multiprocessing.current_process())) bedtools_intervals = [ pybedtools.Interval("1", interval[0], interval[1]) for interval in sorted(intervals) ] func_logger.info("bedtools_intervals %s" % (str(bedtools_intervals))) if not bedtools_intervals: return [] potential_breakpoints = sorted( list( set([interval.start for interval in bedtools_intervals] + [interval.end for interval in bedtools_intervals]))) breakpoints = [] for breakpoint in potential_breakpoints[1:-1]: # Check if the breakpoint is within window distance of a validated breakpoint if min([window + 1] + [abs(b[0] - breakpoint) for b in breakpoints]) <= window: continue func_logger.info("\tExamining potential breakpoint %d for support" % breakpoint) left_support = [ interval[0] for interval in intervals if abs(interval[0] - breakpoint) <= window ] right_support = [ interval[1] for interval in intervals if abs(interval[1] - breakpoint) <= window ] counter_examples = [ age_record for age_record in age_records if age_record.has_long_ref_flanks() and ( age_record.has_ref_deletion(window) or age_record.has_insertion(min_diff=20, max_diff=49)) and age_record.breakpoint_match(breakpoint, window) ] if counter_examples: counter_example_ends = [ age_record.start1_end1s for age_record in counter_examples ] func_logger.info("\t\tSkipping breakpoint %d due to %s" % (breakpoint, str(counter_example_ends))) continue if left_support: func_logger.info("\t\tLeft support %s" % (str(left_support))) if right_support: func_logger.info("\t\tRight support %s" % (str(right_support))) if (left_support and right_support) and min( [window + 1] + [abs(b[0] - breakpoint) for b in breakpoints]) > window: both_support = [ age_record for age_record in age_records if age_record.has_insertion(min_diff=50, max_diff=1000000000) and age_record.breakpoint_match(breakpoint, window) ] if both_support: func_logger.info("\t\tboth_support = %s" % (str(both_support))) func_logger.info("\t\tinsertion lengths = %s" % (str([ age_record.insertion_length() for age_record in both_support ]))) insertion_length = max( [0] + [age_record.insertion_length() for age_record in both_support]) insertion_sequence = both_support[0].get_insertion_sequence( ) if both_support else "." func_logger.info("\t\tInsertion length = %d %s" % (insertion_length, insertion_sequence)) breakpoints.append( (breakpoint, insertion_length, insertion_sequence)) func_logger.info("Nonfiltered breakpoints as %s" % (str(breakpoints))) if len(breakpoints) > 1: breakpoints = filter( lambda x: min(abs(x[0] - expected_bp_pos[ 0]), abs(expected_bp_pos[1] - x[0])) < dist_to_expected_bp, breakpoints) func_logger.info("Gathered breakpoints as %s" % (str(breakpoints))) return [(start + b[0], b[1], b[2]) for b in breakpoints]
def collector_process(collector, metric_queue, log): """ """ proc = multiprocessing.current_process() if setproctitle: setproctitle('%s - %s' % (getproctitle(), proc.name)) signal.signal(signal.SIGALRM, signal_to_exception) signal.signal(signal.SIGHUP, signal_to_exception) signal.signal(signal.SIGUSR2, signal_to_exception) interval = float(collector.config['interval']) log.debug('Starting') log.debug('Interval: %s seconds', interval) # Validate the interval if interval <= 0: log.critical('interval of %s is not valid!', interval) sys.exit(1) # Start the next execution at the next window plus some stagger delay to # avoid having all collectors running at the same time next_window = math.floor(time.time() / interval) * interval stagger_offset = random.uniform(0, interval - 1) # Allocate time till the end of the window for the collector to run. With a # minimum of 1 second max_time = int(max(interval - stagger_offset, 1)) log.debug('Max collection time: %s seconds', max_time) # Setup stderr/stdout as /dev/null so random print statements in thrid # party libs do not fail and prevent collectors from running. # https://github.com/BrightcoveOS/Diamond/issues/722 sys.stdout = open(os.devnull, 'w') sys.stderr = open(os.devnull, 'w') while (True): try: time_to_sleep = (next_window + stagger_offset) - time.time() if time_to_sleep > 0: time.sleep(time_to_sleep) elif time_to_sleep < 0: # clock has jumped, lets skip missed intervals next_window = time.time() next_window += interval # Ensure collector run times fit into the collection window signal.alarm(max_time) # Collect! collector._run() # Success! Disable the alarm signal.alarm(0) except SIGALRMException: log.error('Took too long to run! Killed!') # Adjust the stagger_offset to allow for more time to run the # collector stagger_offset = stagger_offset * 0.9 max_time = int(max(interval - stagger_offset, 1)) log.debug('Max collection time: %s seconds', max_time) except SIGHUPException: # Reload the config if requested # We must first disable the alarm as we don't want it to interrupt # us and end up with half a loaded config signal.alarm(0) log.info('Reloading config reload due to HUP') collector.load_config() log.info('Config reloaded') except Exception: log.exception('Collector failed!') break
def process(self, parms): print "Processor : ", os.getppid(), os.getpid( ), parms, multiprocessing.current_process().name return ""
def calculate(func,args): result = func(*args) print("%s says %s%s=%d"%(current_process(),func.__name__,repr(args),result)) return result
def do_something(self): proc_name = multiprocessing.current_process().name logging.debug(f'Doing something fancy in {proc_name} for {self.name}')
def work1(): for i in range(10): print("------1子进程:", multiprocessing.current_process().pid) # 获取父进程pid print("------2子进程:", os.getppid()) time.sleep(0.5)
import socket import ssl import multiprocessing shost = "192.168.16.129" sport = "10000" Socket_Server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) Socket_Server_Address = (shost, sport) Socket_Server.bind(Socket_Server_Address) Socket_Server.listen(100) TLS_socket = ssl.wrap_socket( Socket_Server, certfile='/home/sarvesh/NII/Certificate/selfsigned.crt', keyfile='/home/sarvesh/NII/Certificate/selfsigned.key', ssl_version=ssl.PROTOCOL_SSLv23) Server_PID = multiprocessing.current_process().pid print("[+] Starting Server (SSL) => " + "192.168.16.129" + ":" + str(10000) + " || PID:" + str(Server_PID)) while (True): try: (Connection, agentAddress) = TLS_socket.accept() print("[+] Client Connected: " + agentAddress[0] + ":" + str(agentAddress[1])) except: Connection.close()
def func(msg): print(multiprocessing.current_process().name+'-'+msg)
def test(my_i, my_q): time.sleep(5) print my_i print my_q print mp.current_process() my_q.put((my_i * my_i, my_i))
def initializer(s): np.random.seed(s + current_process()._identity[0]) # using only 1 OpenMP thread os.environ['OMP_NUM_THREADS'] = "1"
import time, multiprocessing import os def work1(): for i in range(10): print("------1子进程:", multiprocessing.current_process().pid) # 获取父进程pid print("------2子进程:", os.getppid()) time.sleep(0.5) if __name__ == '__main__': w1_process = multiprocessing.Process(target=work1) w1_process.start() while 1: print("主进程:", multiprocessing.current_process().pid) time.sleep(0.5)
def rendJitTriang(x, y, n, jsig, mcp, imageBounds, pixelSize, seeds=None, geometric_mean=True, mdh=None): """ Parameters ---------- x : ndarray x positions [nm] y : ndarray y positions [nm] n : number of jittered renderings to average into final rendering jsig : ndarray (or scalar float) standard deviations [nm] of normal distributions to sample when jittering for each point mcp : float Monte Carlo sampling probability (0, 1] imageBounds : PYME.IO.ImageBounds ImageBounds instance - range in each dimension should ideally be an integer multiple of pixelSize. pixelSize : float size of pixels to be rendered [nm] seeds : ndarray [optional] supplied seeds if we want to strictly reconstruct a previously generated image geometric_mean : bool [optional] Flag to scale intensity by geometric mean (True) or [localizations / um^2] (False) mdh: PYME.IO.MetaDataHandler.MDHandlerBase or subclass [optional] metadata handler to store seeds to Returns ------- im : ndarray 2D Jittered Triangulation rendering. Notes ----- Triangles which reach outside of the image bounds are dropped and not included in the rendering. """ sizeX = int((imageBounds.x1 - imageBounds.x0) / pixelSize) sizeY = int((imageBounds.y1 - imageBounds.y0) / pixelSize) if geometric_mean: fcn = _rend_jit_tri_geometric else: fcn = rendJitTri if multiProc and not multiprocessing.current_process().daemon: im = shmarray.zeros((sizeX, sizeY)) x = shmarray.create_copy(x) y = shmarray.create_copy(y) if type(jsig) == numpy.ndarray: jsig = shmarray.create_copy(jsig) # We will generate 1 process for each seed, defaulting to generating a seed for each CPU core if seeds are not # passed explicitly. Rendering with explicitly passed seeds will be deterministic, but performance will not be # optimal unless n_seeds = n_CPUs seeds = _generate_subprocess_seeds(multiprocessing.cpu_count(), mdh, seeds) iterations = _iterations_per_task(n, len(seeds)) processes = [ multiprocessing.Process(target=fcn, args=(im, x, y, jsig, mcp, imageBounds, pixelSize, nIt, s)) for nIt, s in zip(iterations, seeds) ] for p in processes: p.start() for p in processes: p.join() else: im = numpy.zeros((sizeX, sizeY)) # Technically we could just call fcn( ....,n), but we replicate the logic above and divide into groups of tasks # so that we can reproduce a previously generated image seeds = _generate_subprocess_seeds(1, mdh, seeds) iterations = _iterations_per_task(n, len(seeds)) for nIt, s in zip(iterations, seeds): # NB - in normal usage, this loop only evaluates once, with nIt=n fcn(im, x, y, jsig, mcp, imageBounds, pixelSize, nIt, seed=s) if geometric_mean: return (1.e6 / (im / n + 1)) * (im > n) else: return im / n
def download_link(tup): link = tup[0] output_path = tup[1] num_workers = tup[2] page_id = str(uuid.uuid4()) url_no_header = None try: # Find the Wayback Machine link if not wayback_prefix.match(link): link_encoded = urllib.parse.quote(link) available, availability_attempt = False, 0 # Sometimes the API returns HTTP success code 200, but archived snapshots shows page is unavailable # when it actually is. Give it a total of three tries. while not available and availability_attempt < 3: response = download_with_retry( f'http://archive.org/wayback/available?url={link_encoded}×tamp=20191127' ) json_response = response.json() available = 'closest' in json_response['archived_snapshots'] availability_attempt += 1 if not available: logging.warning( f'Not available on Wayback Machine: {link}, HTTP code {response.status_code}, {json_response}' ) return {'link': link, 'page_id': page_id, 'available': False} url = json_response['archived_snapshots']['closest']['url'] else: url = link match = replace_pattern.search(url) assert match url_no_header = replace_pattern.sub(f'{match.group(1)}id_', url) response = download_with_retry(url_no_header) html_page = response.text parsed_text = extract_text(html_page) proc = multiprocessing.current_process() pid_mod = str(proc.pid % num_workers) (output_path / pid_mod).mkdir(parents=True, exist_ok=True) with open(output_path / pid_mod / page_id, 'w') as f: doc = { 'id': url_no_header, 'contents': parsed_text, } f.write(json.dumps(doc) + '\n') return { 'link': link, 'page_id': page_id, 'available': True, 'status_code': response.status_code, 'wayback_url': url_no_header, } except HTTPError as http_err: logging.warning(f'HTTP error occurred: {http_err} for {link}') return { 'link': link, 'page_id': page_id, 'available': False, 'status_code': http_err.response.status_code if http_err.response else None, 'wayback_url': url_no_header, } except UnicodeDecodeError as e: logging.warning(f'Unicode decode error occurred: {e} for {link}') return { 'link': link, 'page_id': page_id, 'available': False, 'status_code': response.status_code, 'wayback_url': url_no_header, } except Exception as e: logging.warning(f'Exception occurred: {e} for {link}') return { 'link': link, 'page_id': page_id, 'available': False, 'status_code': None, 'wayback_url': url_no_header, }
cron_logger.info('FOREST-CRON PROCESS CALLED') cron_logger.info(LOG_INDENT + 'Getting command line params ... ') process = get_command_line_params(argv) if 'debug' in process: cron_logger.info(LOG_INDENT + 'Running in debug mode ... ') cron_logger.info(LOG_INDENT + 'Calling process handler ... ') _Processes_Handler.execute(process) except Already_Handled_Exception as already_handled_exception: raise already_handled_exception except Exception as e: cron_logger.critical(e.message) already_handled_exception = Already_Handled_Exception(e.message) return already_handled_exception # Start a process execution: if __name__ == '__main__': # Main process name: multiprocessing.current_process().name = 'cron' # Set all loggers: set_all_loggers() # Init: cron_logger = _Utilities.get_logger('cron') cron_logger.info(' ') cron_logger.info(_Constants.LOG_SEPARATOR) # Process params: process = 'synchronization_layer_1' process_params = {'process': process} #End of process_params # Exectuion: main(sys.argv[1:])
def rendJitTet(x, y, z, n, jsig, jsigz, mcp, imageBounds, pixelSize, sliceSize=100): # FIXME - signature now differs from visHelpersMin #import gen3DTriangs sizeX = int((imageBounds.x1 - imageBounds.x0) / pixelSize) sizeY = int((imageBounds.y1 - imageBounds.y0) / pixelSize) sizeZ = int((imageBounds.z1 - imageBounds.z0) / sliceSize) # convert from [nm] to [pixels] x = (x - imageBounds.x0) / pixelSize y = (y - imageBounds.y0) / pixelSize z = (z - imageBounds.z0) / sliceSize jsig = jsig / pixelSize jsigz = jsigz / sliceSize if multiProc and not multiprocessing.current_process().daemon: im = shmarray.zeros((sizeX, sizeY, sizeZ), order='F') x = shmarray.create_copy(x) y = shmarray.create_copy(y) z = shmarray.create_copy(z) if type(jsig) == numpy.ndarray: jsig = shmarray.create_copy(jsig) if type(jsigz) == numpy.ndarray: jsigz = shmarray.create_copy(jsigz) nCPUs = multiprocessing.cpu_count() tasks = int(n / nCPUs) * numpy.ones(nCPUs, 'i') tasks[:int(n % nCPUs)] += 1 processes = [ multiprocessing.Process(target=rendJTet, args=(im, y, x, z, jsig, jsigz, mcp, nIt)) for nIt in tasks ] for p in processes: p.start() for p in processes: p.join() return im / n else: im = numpy.zeros((sizeX, sizeY, sizeZ), order='F') rendJTet(im, y, x, z, jsig, jsigz, mcp, n) return im / n
def my_service(): name = multiprocessing.current_process().name print name, 'Starting' time.sleep(3) print name, 'Exiting'
def fisher_worker(task_q, result_q, outp): while True: try: tables, nth_job = task_q.get() ColorText().info( "[poolseq_tk]: %s running Fisher's Exact test on %d tables ...\n" % (mp.current_process().name, len(tables)), "stderr") tmpFile = outp + "." + mp.current_process().name + ".fisher" fOUT = open(tmpFile, 'w') pvals_split, odds_ratios_split = {}, {} nTests = 0 for k in sorted(tables.iterkeys()): oddsr = 0.0 chr = k[0] pos = k[1] alt_base = tables[k][2] ref_base = tables[k][1] ref_ac1 = int(tables[k][3]) alt_ac1 = int(tables[k][4]) ref_ac2 = int(tables[k][5]) alt_ac2 = int(tables[k][6]) if (sum(map(int, tables[k][3:7])) >= 10 and alt_ac1 + ref_ac1 >= 5 and # row subtotals alt_ac2 + ref_ac2 >= 5 and alt_ac1 + alt_ac2 >= 5 and # column subtotals ref_ac1 + ref_ac2 >= 5): nTests += 1 if (ref_ac1 == 0 or ref_ac2 == 0 or # add pseudo counts in case alt_ac1 == 0 or alt_ac2 == 0): # odds ratio goes to Inf ref_ac1 += 1 ref_ac2 += 1 alt_ac1 += 1 alt_ac2 += 1 data_vector = robjects.IntVector( [ref_ac1, alt_ac1, ref_ac2, alt_ac2]) table = robjects.r['matrix'](data_vector, ncol=2) rfisher = robjects.r['fisher.test'](table, alternative='t') # pvals_split[pos] = float(rfisher[0][0]) # if (ref_ac1 == 0 or ref_ac2 == 0 or # alt_ac1 == 0 or alt_ac2 == 0): # oddsr = (float(ref_ac1+1)/(alt_ac1+1))/(float(ref_ac2+1)/(alt_ac2+1)) # else: pvalue = float(rfisher[0][0]) oddsr = rfisher[2][0] # odds_ratios_split[pos] = oddsr if pvalue == 0.0: fOUT.write("%s\t%d\t%.4g\t%.8f\tInf\n" % (chr, pos, pvalue, oddsr)) elif pvalue == 1.0: fOUT.write("%s\t%d\t%.4g\t%.8f\t0.00000000\n" % (chr, pos, pvalue, oddsr)) else: fOUT.write( "%s\t%d\t%.8f\t%.8f\t%.8f\n" % (chr, pos, pvalue, oddsr, -1 * math.log10(pvalue))) fOUT.close() ColorText().info( "[poolseq_tk]: %s ran %d tests\n" % (mp.current_process().name, nTests), "stderr") result_q.put(tmpFile) # result_q.put((pvals_split, odds_ratios_split)) finally: task_q.task_done()