def process_results(self, data, re_test=None): results = [] if data['status'] != 'success': ADDON.log(data) return [] gc.disable() for result in data['result']: title = self.normalize(result['title']) if re_test: if re_test.search(title) is None: continue sourcetitle = self.normalize(result['sourcetitle']) hoster = result['hosterurls'] extension = result['extension'] size = result['sizeinternal'] extension = result['extension'] host_name = result['hostername'] hosts = result['hosterurls'] if extension == 'rar': continue for host in hosts: if self.filter_host(host_name): url = "%s://%s" % (self.service, host['url']) quality = self.test_quality(title+sourcetitle+self.normalize(url)) result = ScraperResult(self.debrid_hosts, self.service, host_name, url, title) result.quality = quality result.size = int(size) result.extension = extension results += [result] gc.enable() return results
def test(self, view): """ Calls the given view and measures the time for it to return. The garbage collector is diabled during execution. """ gc_old = gc.isenabled() gc.disable() try: start = timeit.default_timer() if view.method == 'GET': response = self.client.get(view.url, view.data) elif view.method == 'POST': response = self.client.post(view.url, view.data) else: raise ValueError('Unknown view method: %s' % view.method) end = timeit.default_timer() # Return result in milliseconds time_ms = (end - start) * 1000 # Try to get version information version = subprocess.check_output(['git', 'describe']) from .models import TestResult return TestResult(view=view, time=time_ms, result=response, result_code=response.status_code, version=version) finally: if gc_old: gc.enable()
def test_trashcan(self): class Ouch: n = 0 def __del__(self): Ouch.n = Ouch.n + 1 if Ouch.n % 17 == 0: gc.collect() # "trashcan" is a hack to prevent stack overflow when deallocating # very deeply nested tuples etc. It works in part by abusing the # type pointer and refcount fields, and that can yield horrible # problems when gc tries to traverse the structures. # If this test fails (as it does in 2.0, 2.1 and 2.2), it will # most likely die via segfault. # Note: In 2.3 the possibility for compiling without cyclic gc was # removed, and that in turn allows the trashcan mechanism to work # via much simpler means (e.g., it never abuses the type pointer or # refcount fields anymore). Since it's much less likely to cause a # problem now, the various constants in this expensive (we force a lot # of full collections) test are cut back from the 2.2 version. gc.enable() N = 150 for count in range(2): t = [] for i in range(N): t = [t, Ouch()] u = [] for i in range(N): u = [u, Ouch()] v = {} for i in range(N): v = {1: v, 2: Ouch()} gc.disable()
def VTiter(self, *parsedArgs,**envars): largs, dictargs = self.full_parse(parsedArgs) if 'query' not in dictargs: raise functions.OperatorError(__name__.rsplit('.')[-1],"No query argument ") query=dictargs['query'] cur = envars['db'].cursor() q = cur.execute(query, parse=False) try: yield list(cur.getdescriptionsafe()) except StopIteration: try: raise finally: try: c.close() except: pass gc.disable() while True: yield q.next() gc.enable()
def test_get_stats(self): stats = gc.get_stats() self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) self.assertEqual(set(st), {"collected", "collections", "uncollectable"}) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): self.addCleanup(gc.enable) gc.disable() old = gc.get_stats() gc.collect(0) new = gc.get_stats() self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"]) gc.collect(2) new = gc.get_stats() self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"] + 1)
def main_measure(data, dataname,agentClassGenerator,params): X_POINTS = params[scp.X_POINTS] STEP = params[scp.STEP] NUM_FOLDS = params[scp.NUM_FOLDS] CLASSIFY_TIME = params[scp.CLASSIFY_TIME] LEARN_TIME = params[scp.LEARN_TIME] SEED = params[scp.SEED] num_features_arr = [ i*STEP for i in range(1,X_POINTS +1) ] print "\n============= Learning Curve ===================" print "Evaluating", dataname print "num_features,", "accuracy%," results=[] for num_features in num_features_arr: agentClass = agentClassGenerator(num_features) try: gc.disable() confusion = AgentAnalyzer().run_one(data, agentClass, CLASSIFY_TIME, LEARN_TIME, num_folds=NUM_FOLDS, seed=SEED) gc.enable() gc.collect() idf = s_common.idf(NUM_FOLDS, data, num_features) results.append( (num_features,confusion, idf) ) print num_features, ',', confusion.getAccuracyStr() except Exception, e: print "Error:",e print " Possible Timeout for", num_features
def getlines(a_filename): # it gives chunks fin = None if a_filename == '-': fin = sys.stdin else: fin = open(a_filename,'r') header = dict() first = True while True: lines = fin.readlines(10**8) if not lines: break gc.disable() lines = [line.rstrip('\r\n').split('\t') for line in lines if line.rstrip('\r\n')] gc.enable() for line in lines: if line[0].startswith('@'): if line[0].startswith('@SQ') and line[1].startswith('SN:') and line[2].startswith('LN:'): k = line[1][3:] v = int(line[2][3:]) header[k] = v else: pass else: if first: first = False yield header header = None yield line if first and header: yield header fin.close()
def _exitfunc(cls): # At shutdown invoke finalizers for which atexit is true. # This is called once all other non-daemonic threads have been # joined. reenable_gc = False try: if cls._registry: import gc if gc.isenabled(): reenable_gc = True gc.disable() pending = None while True: if pending is None or finalize._dirty: pending = cls._select_for_exit() finalize._dirty = False if not pending: break f = pending.pop() try: # gc is disabled, so (assuming no daemonic # threads) the following is the only line in # this function which might trigger creation # of a new finalizer f() except Exception: sys.excepthook(*sys.exc_info()) assert f not in cls._registry finally: # prevent any more finalizers from executing during shutdown finalize._shutdown = True if reenable_gc: gc.enable()
def load(self): try: env = Environment.Environment(os.path.join(self.cachedir, "build.config.py")) except (IOError, OSError): pass else: if env["version"] < HEXVERSION: raise Utils.WafError("Version mismatch! reconfigure the project") for t in env["tools"]: self.setup(**t) try: gc.disable() f = data = None Node.Nodu = self.node_class try: f = open(os.path.join(self.bdir, DBFILE), "rb") except (IOError, EOFError): pass try: if f: data = cPickle.load(f) except AttributeError: if Logs.verbose > 1: raise if data: for x in SAVED_ATTRS: setattr(self, x, data[x]) else: debug("build: Build cache loading failed") finally: if f: f.close() gc.enable()
def reads_from_fastq_file(file_name, size_read_buffer = 10**8): fid = None if file_name == '-': fid = sys.stdin elif file_name.lower().endswith('.gz'): fid = gzip.open(file_name,'r') else: fid = open(file_name,'r') piece = [None,None,None,None] ij = 0 while True: gc.disable() lines = fid.readlines(size_read_buffer) gc.enable() if not lines: break for line in lines: ij = ij + 1 piece[ij-1] = line if ij == 4: bucket = (piece[0].rstrip('\r\n')[1:], piece[1].rstrip('\r\n'), piece[3].rstrip('\r\n')) yield bucket piece = [None,None,None,None] ij = 0 fid.close()
def testNoReferenceCyclesAfterCall(self): class ChildNetwork(network.Network): def __init__(self, name=None): super(ChildNetwork, self).__init__(name=name) def call(self, x): return x * 2. class ParentNetwork(network.Network): def __init__(self, name=None): super(ParentNetwork, self).__init__(name=name) self.l1 = self.track_layer(ChildNetwork()) def call(self, x): return self.l1(x) one = constant_op.constant([[1.0]]) gc.disable() gc.collect() previous_gc_debug_flags = gc.get_debug() gc.set_debug(gc.DEBUG_SAVEALL) preexisting = len(gc.garbage) net = ParentNetwork() net(one) del net gc.collect() # There should be no additional garbage requiring collection. self.assertEqual(preexisting, len(gc.garbage)) gc.set_debug(previous_gc_debug_flags) gc.enable()
def reads_from_fastq_file(f_name,size_read_buffer=10**8): fid = None if f_name == '-': fid = sys.stdin elif f_name.lower().endswith('.gz'): fid = gzip.open(f_name,'r') else: fid = open(f_name,'r') j = 0 p1 = None p2 = None while True: gc.disable() lines = fid.readlines(size_read_buffer) gc.enable() if not lines: break for a_line in lines: j = j + 1 if j == 1: p1 = a_line elif j == 2: p2 = a_line elif j == 4: yield (p1,p2,a_line) p1 = None p2 = None j = 0 fid.close()
def add_line(self,line): gc.disable() self.data.append(line) gc.enable() self.size = self.size + len(line) if self.size > self.size_buffer: self.__write_buffer()
def main(): for key_file in glob.glob("*.pem"): with open(key_file, "r") as f: private_key = f.read() # Check that the signatures match. results = [] for name, func in get_signature.items(): if not available[name]: continue results.append((name, func(private_key, "foo bar"))) print "{} using {}:".format(key_file, ", ".join(r[0] for r in results)) signatures = dict((r[1], True) for r in results).keys() if len(signatures) == 1: print " EQUAL" else: print " NOT EQUAL" # Simple benchmark. iters = 500 s = get_random_string(500) for name, func in get_signature.items(): if not available[name]: continue print "running {} iterations of {}".format(iters, name) gc.disable() tic = time.time() for i in range(iters): func(private_key, s) toc = time.time() gc.enable() print " took {:.3f}s".format(toc - tic) print
def _main(): """ main loop """ user = None GAME_SELECT_DELAY = .4 while True: gc.disable() hardware.reset() if user is None: user = persistence.get_anonymous() # do game selection by good/bad light hardware.write_message("Waiting for a game selection"," Choose 1 - %d" % len(games)).\ display_characters('H','I') select = hardware.select_by_lights(len(games),9) if select == 9: for i in xrange(5): hardware.display_characters('B','Y')\ .wait(.3)\ .display_characters(' ',' ')\ .wait(.2) hardware.wait(1)\ .cleanup() exit() # game picked, construct it (name,description,levels,author,date,ver) = games[select-1].GameInfo() game = games[select-1]() level = 1 if levels > 1: hardware.display_characters('L','E') level = hardware.select_by_lights(levels,9) if level == 9: continue hardware.display_number(0) game.initialize(hardware,user,level) hardware.write_message("Playing game>",name) hardware.write_debug(description,'by',author) score = Score().load_at_start(name,ver,level,user) persistence.save_score_start(score,user) start = time.time() score.score = game.play() score.duration_sec = time.time() - start persistence.save_score_end(score,user) hardware.beep(2,.5) hardware.blink_light_until_button(5) gc.enable() gc.collect()
def load(self, file): # "file" could also be a socket gc.disable() try: return pickle.load(file) finally: gc.enable()
def loadblk(self, blk, buf): # we are in sighandler - establish cycle which also referenced obj_4del and trigger full GC assert self.obj_4del is not None w = weakref.ref(self.obj_4del) assert w() is self.obj_4del # establish cycle with leaf ref to obj_4del a = C() b = C() a.b = b b.a = a a.obj_4del = self.obj_4del self.obj_4del = None assert w() is not None # del a=b cycle - it should stay alice, while gc is disabled gc_save = gc.isenabled() gc.disable() del a, b assert w() is not None # gc - a=b and obj_4del collected gc.collect() assert w() is None if gc_save: gc.enable() self.marker_list.append(2)
def f(*k, **kw): try: gc.disable() ret = fun(*k, **kw) finally: gc.enable() return ret
def group_edges(cs): plus = [] minus = [] pairs = [] gc.disable() interval = 1000 for current, cl1 in enumerate(cs.clusters): if (current % interval) == 0: update_status(float(current) / len(cs.clusters), "Grouping all edges...") bib1 = tuple(cl1.bibs)[0] pointers = cl1.out_edges for bib2 in xrange(len(cl1.out_edges)): val = pointers[bib2] if val[0] not in Bib_matrix.special_numbers: if val[0] > edge_cut_prob: pairs.append((bib1, bib2, val)) elif val[0] == Bib_matrix.special_symbols['+']: plus.append((bib1, bib2)) elif val[0] == Bib_matrix.special_symbols['-']: minus.append((bib1, bib2)) else: assert val[0] == Bib_matrix.special_symbols[None], "Invalid Edge" update_status_final("Finished with the edge grouping.") bibauthor_print("Positive edges: %d, Negative edges: %d, Value edges: %d." % (len(plus), len(minus), len(pairs))) gc.enable() return plus, minus, pairs
def next_seed(self): """ Load next seed from disk """ seed = next(self._all_seeds) folder = os.path.join(self._root, str(seed), self._subset) self.data = [] silence = None gc.disable() for filename in os.listdir(folder): command = os.path.splitext(os.path.basename(filename))[0] with open(os.path.join(folder, filename), "r") as pkl_file: audio = pickle.load(pkl_file) # Check for 'silence' if command == "silence": silence = audio else: target = self.classes.index(os.path.basename(command)) self.data.extend(itertools.product(audio, [target])) gc.enable() target = self.classes.index("silence") self.data += [(silence, target)] * int(len(self.data) * self._silence_percentage) return seed
def timer(fxn, args): gc.disable() t1 = time.time() R = fxn(*args) t2 = time.time() gc.enable() return R, (t2 - t1)
def test_for_mem_leak(): from calibre.utils.mem import memory, gc_histogram, diff_hists import gc gc.disable() scanner = DeviceScanner() scanner.scan() memory() # load the psutil library for i in xrange(3): gc.collect() for reps in (1, 10, 100, 1000): for i in xrange(3): gc.collect() h1 = gc_histogram() startmem = memory() for i in xrange(reps): scanner.scan() for i in xrange(3): gc.collect() usedmem = memory(startmem) prints('Memory used in %d repetitions of scan(): %.5f KB'%(reps, 1024*usedmem)) prints('Differences in python object counts:') diff_hists(h1, gc_histogram()) prints()
def handoff_all(): main_logger.info("Preparing to re-exec with handoffs") handoff_data = {} for name, manager in clients.items(): data = manager.handoff() if data: handoff_data[name] = data main_logger.info("Final handoff data: {!r}".format(handoff_data)) for manager in clients.values(): manager.get() main_logger.info("All managers stopped") # TODO this will fail if the bytes contain invalid utf-8 handoff_data_str = json.dumps(handoff_data) env = os.environ.copy() env['handoff_data'] = handoff_data_str main_logger.info("Calling execve({!r}, {!r}, {!r})".format(sys.executable, sys.argv, env)) # critical section - absolutely no blocking calls beyond this point gc.disable() # we don't want any destructors running open_fds = set(map(int, os.listdir('/proc/self/fd'))) for fd in open_fds - {0, 1, 2} - set(data['fd'] for data in handoff_data.values()): try: os.close(fd) except OSError: pass # this is probably EBADF, but even if it isn't we can't do anything about it os.execve(sys.executable, [sys.executable, '-m', 'ekimbot'] + sys.argv[1:], env)
def arrayFP(input): outfp = [] for i in input: gc.disable() outfp.append(calcFingerprints(i[0])) gc.enable() return np.array(outfp, dtype=np.uint8)
def wrapped(self, *args, **kwargs): import gc gc.disable() gc.collect() deltas = [] d = None try: while True: d = gettotalrefcount() method(self, *args, **kwargs) if hasattr(self, 'cleanup'): self.cleanup() if 'urlparse' in sys.modules: sys.modules['urlparse'].clear_cache() d = gettotalrefcount() - d deltas.append(d) if 2 <= len(deltas) <= 3 and deltas[-2:] == [0, 0]: break if 3 <= len(deltas) and deltas[-3:] == [0, 0, 0]: break if len(deltas) >= 6: raise AssertionError('refcount increased by %r' % (deltas, )) finally: gc.collect() gc.enable()
def test_cyclic_dependency_withKeepRef(self): """Create 2 objects with a cyclic dependency, so that they can only be removed by the garbage collector, and then invoke the garbage collector in a different thread. """ import gc class CyclicChildObject(ObjectView): def __init__(self, model): super(CyclicChildObject, self).__init__(None) self.setModel(model) class CyclicObject(ObjectModel): def __init__(self): super(CyclicObject, self).__init__() self._view = CyclicChildObject(self) # turn off automatic garbage collection, to be able to trigger it # at the 'right' time gc.disable() alive = lambda :sum(isinstance(o, CyclicObject) for o in gc.get_objects() ) # # first proof that the wizard is only destructed by the garbage # collector # cycle = CyclicObject() self.assertTrue(alive()) del cycle self.assertTrue(alive()) gc.collect() self.assertFalse(alive())
def import_data(filepath): conn = sqlite3.connect(filepath) c = conn.cursor() count = 0 gc.disable() for d in c.execute(''' select hyg.*, trek.name as trek, gal.x, gal.y, gal.z from tblhyg hyg inner join tblgalactic gal on gal.starid = hyg.starid left outer join tblstartrek trek on trek.starid = hyg.starid'''): starid, hip, hd, hr, gliese, bayerflam, proper, ra, dec, dist, mag,\ absmag, spectrum, color, trek, x, y, z = d hd = 'HD%d' % hd if hd else None hip = 'HIP%d' % hip if hip else None hr = 'HR%d' % hr if hr else None name = proper or hd or hip or hr pos = vec3(x, y, z) * _km_in_ly obj = SpaceObject.create(names=(name,)) obj.coords = pos count += 1 if count % 1000 == 0: gc.collect() print count gc.enable() conn.close()
def main(): gc.disable() app = QtGui.QApplication(sys.argv) dialog = MyDialog() dialog.show() retval = app.exec_() sys.exit(retval)
def wrapper(*args, **kwargs): try: gc.disable() return fun(*args, **kwargs) finally: gc.enable() gc.collect()
def newfunc(*args,**kargs): try: gc.disable() result = func( *args, **kargs) finally: gc.enable() return result
#!/usr/bin/env python ###################################################### # # howdoi - instant coding answers via the command line # written by Benjamin Gleitzman ([email protected]) # inspired by Rich Jones ([email protected]) # ###################################################### import gc gc.disable() # disable right at the start, we don't need it import argparse import glob import os import random import re import requests import requests_cache import sys from . import __version__ from pygments import highlight from pygments.lexers import guess_lexer, get_lexer_by_name from pygments.formatters.terminal import TerminalFormatter from pygments.util import ClassNotFound from pyquery import PyQuery as pq from requests.exceptions import ConnectionError from requests.exceptions import SSLError
import clr clr.AddReference('MinadNet') from System import AppDomain AppDomain.CurrentDomain.SetData("APP_CONFIG_FILE", "./app.config") import time import gc from System import Console from System.Net.Sockets import * from System.Net import * from MinadNet import * from MinadNet.Pools import BufferPool gc.disable() MainBufferPool = BufferPool(1024) class EchoServer(Listener): def OnConnection(self, sock): #print("New connection from %s"%sock.RemoteEndPoint.ToString()) newClient = EchoClient(sock, False) newClient.setReadBuf() newClient.setName("CLIENT-SERVER") newClient.Receive(newClient.ReadBuf) class EchoClient(Client): def setName(self, name): self.name = name def setReadBuf(self):
def main(cmd_args): import optparse global options, PSYCO usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc oparser = optparse.OptionParser(usage) oparser.add_option( "-l", "--logfilename", default="", help="contains error messages") oparser.add_option( "-v", "--verbosity", type="int", default=0, help="level of information and diagnostics provided") oparser.add_option( "-m", "--mmap", type="int", default=-1, help="1: use mmap; 0: don't use mmap; -1: accept heuristic") oparser.add_option( "-e", "--encoding", default="", help="encoding override") oparser.add_option( "-f", "--formatting", type="int", default=0, help="0 (default): no fmt info\n" "1: fmt info (all cells)\n" ) oparser.add_option( "-g", "--gc", type="int", default=0, help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc") oparser.add_option( "-s", "--onesheet", default="", help="restrict output to this sheet (name or index)") oparser.add_option( "-u", "--unnumbered", action="store_true", default=0, help="omit line numbers or offsets in biff_dump") oparser.add_option( "-d", "--on-demand", action="store_true", default=0, help="load sheets on demand instead of all at once") oparser.add_option( "-t", "--suppress-timing", action="store_true", default=0, help="don't print timings (diffs are less messy)") oparser.add_option( "-r", "--ragged-rows", action="store_true", default=0, help="open_workbook(..., ragged_rows=True)") options, args = oparser.parse_args(cmd_args) if len(args) == 1 and args[0] in ("version", ): pass elif len(args) < 2: oparser.error("Expected at least 2 args, found %d" % len(args)) cmd = args[0] xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5") if cmd == 'biff_dump': xlrd.dump(args[1], unnumbered=options.unnumbered) sys.exit(0) if cmd == 'biff_count': xlrd.count_records(args[1]) sys.exit(0) if cmd == 'version': print("xlrd: %s, from %s" % (xlrd_version, xlrd.__file__)) print("Python:", sys.version) sys.exit(0) if options.logfilename: logfile = LogHandler(open(options.logfilename, 'w')) else: logfile = sys.stdout mmap_opt = options.mmap mmap_arg = xlrd.USE_MMAP if mmap_opt in (1, 0): mmap_arg = mmap_opt elif mmap_opt != -1: print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt) fmt_opt = options.formatting | (cmd in ('xfc', )) gc_mode = options.gc if gc_mode: gc.disable() for pattern in args[1:]: for fname in glob.glob(pattern): print("\n=== File: %s ===" % fname) if logfile != sys.stdout: logfile.setfileheading("\n=== File: %s ===\n" % fname) if gc_mode == 1: n_unreachable = gc.collect() if n_unreachable: print("GC before open:", n_unreachable, "unreachable objects") if PSYCO: import psyco psyco.full() PSYCO = 0 try: t0 = time.time() bk = xlrd.open_workbook(fname, verbosity=options.verbosity, logfile=logfile, use_mmap=mmap_arg, encoding_override=options.encoding, formatting_info=fmt_opt, on_demand=options.on_demand, ragged_rows=options.ragged_rows, ) t1 = time.time() if not options.suppress_timing: print("Open took %.2f seconds" % (t1-t0,)) except xlrd.XLRDError as e: print("*** Open failed: %s: %s" % (type(e).__name__, e)) continue except KeyboardInterrupt: print("*** KeyboardInterrupt ***") traceback.print_exc(file=sys.stdout) sys.exit(1) except BaseException as e: print("*** Open failed: %s: %s" % (type(e).__name__, e)) traceback.print_exc(file=sys.stdout) continue t0 = time.time() if cmd == 'hdr': bk_header(bk) elif cmd == 'ov': # OverView show(bk, 0) elif cmd == 'show': # all rows show(bk) elif cmd == '2rows': # first row and last row show(bk, 2) elif cmd == '3rows': # first row, 2nd row and last row show(bk, 3) elif cmd == 'bench': show(bk, printit=0) elif cmd == 'fonts': bk_header(bk) show_fonts(bk) elif cmd == 'names': # named reference list show_names(bk) elif cmd == 'name_dump': # named reference list show_names(bk, dump=1) elif cmd == 'labels': show_labels(bk) elif cmd == 'xfc': count_xfs(bk) else: print("*** Unknown command <%s>" % cmd) sys.exit(1) del bk if gc_mode == 1: n_unreachable = gc.collect() if n_unreachable: print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects") if not options.suppress_timing: t1 = time.time() print("\ncommand took %.2f seconds\n" % (t1-t0,)) return None
def setUp(self): self.using_gc = gc.isenabled() gc.disable()
def _execute_child(self, args, executable, preexec_fn, cwd, env, universal_newlines, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite): """Execute program (POSIX version)""" if isinstance(args, basestring): args = [args] else: args = list(args) if shell: args = ["/bin/sh", "-c"] + args if executable is None: executable = args[0] gc_was_enabled = gc.isenabled() # Disable gc to avoid bug where gc -> file_dealloc -> # write to stderr -> hang. http://bugs.python.org/issue1336 gc.disable() try: self.pid = os.fork() except: if gc_was_enabled: gc.enable() raise self._child_created = True if self.pid == 0: # Child try: # Close parent's pipe ends if p2cwrite is not None: os.close(p2cwrite) if c2pread is not None: os.close(c2pread) if errread is not None: os.close(errread) # Dup fds for child if p2cread is not None: os.dup2(p2cread, 0) if c2pwrite is not None: os.dup2(c2pwrite, 1) if errwrite is not None: os.dup2(errwrite, 2) # Close pipe fds. Make sure we don't close the same # fd more than once, or standard fds. if p2cread is not None and p2cread not in (0, ): os.close(p2cread) if c2pwrite is not None and c2pwrite not in (p2cread, 1): os.close(c2pwrite) if errwrite is not None and errwrite not in \ (p2cread, c2pwrite, 2): os.close(errwrite) if cwd is not None: os.chdir(cwd) if preexec_fn: preexec_fn() if env is None: os.execvp(executable, args) else: os.execvpe(executable, args, env) except: # Child execution failure os._exit(255) # Parent if gc_was_enabled: gc.enable() if p2cread is not None and p2cwrite is not None: os.close(p2cread) if c2pwrite is not None and c2pread is not None: os.close(c2pwrite) if errwrite is not None and errread is not None: os.close(errwrite)
def main(self): rounds = self.values['-n'] reportfile = self.values['-f'] show_bench = self.values['-s'] compare_to = self.values['-c'] hidenoise = self.values['-d'] warp = int(self.values['-w']) withgc = self.values['--with-gc'] limitnames = self.values['-t'] if limitnames: if _debug: print('* limiting test names to one with substring "%s"' % \ limitnames) limitnames = re.compile(limitnames, re.I) else: limitnames = None verbose = self.verbose withsyscheck = self.values['--with-syscheck'] calibration_runs = self.values['-C'] timer = self.values['--timer'] print('-' * LINE) print('PYBENCH %s' % __version__) print('-' * LINE) print('* using %s %s' % ( getattr(platform, 'python_implementation', lambda:'Python')(), ' '.join(sys.version.split()))) # Switch off garbage collection if not withgc: try: import gc except ImportError: print('* Python version doesn\'t support garbage collection') else: try: gc.disable() except NotImplementedError: print('* Python version doesn\'t support gc.disable') else: print('* disabled garbage collection') # "Disable" sys check interval if not withsyscheck: # Too bad the check interval uses an int instead of a long... value = 2147483647 try: sys.setcheckinterval(value) except (AttributeError, NotImplementedError): print('* Python version doesn\'t support sys.setcheckinterval') else: print('* system check interval set to maximum: %s' % value) if timer == TIMER_SYSTIMES_PROCESSTIME: import systimes print('* using timer: systimes.processtime (%s)' % \ systimes.SYSTIMES_IMPLEMENTATION) else: # Check that the clock function does exist try: get_timer(timer) except TypeError: print("* Error: Unknown timer: %s" % timer) return print('* using timer: %s' % timer) if hasattr(time, 'get_clock_info'): info = time.get_clock_info(timer[5:]) print('* timer: resolution=%s, implementation=%s' % (info.resolution, info.implementation)) print() if compare_to: try: f = open(compare_to,'rb') bench = pickle.load(f) bench.name = compare_to f.close() compare_to = bench except IOError as reason: print('* Error opening/reading file %s: %s' % ( repr(compare_to), reason)) compare_to = None if show_bench: try: f = open(show_bench,'rb') bench = pickle.load(f) bench.name = show_bench f.close() bench.print_header() if compare_to: bench.print_comparison(compare_to, hidenoise=hidenoise, limitnames=limitnames) else: bench.print_benchmark(hidenoise=hidenoise, limitnames=limitnames) except IOError as reason: print('* Error opening/reading file %s: %s' % ( repr(show_bench), reason)) print() return if reportfile: print('Creating benchmark: %s (rounds=%i, warp=%i)' % \ (reportfile, rounds, warp)) print() # Create benchmark object bench = Benchmark(reportfile, verbose=verbose, timer=timer, warp=warp, calibration_runs=calibration_runs) bench.rounds = rounds bench.load_tests(Setup, limitnames=limitnames) try: bench.calibrate() bench.run() except KeyboardInterrupt: print() print('*** KeyboardInterrupt -- Aborting') print() return bench.print_header() if compare_to: bench.print_comparison(compare_to, hidenoise=hidenoise, limitnames=limitnames) else: bench.print_benchmark(hidenoise=hidenoise, limitnames=limitnames) # Ring bell sys.stderr.write('\007') if reportfile: try: f = open(reportfile,'wb') bench.name = reportfile pickle.dump(bench,f) f.close() except IOError as reason: print('* Error opening/writing reportfile') except IOError as reason: print('* Error opening/writing reportfile %s: %s' % ( reportfile, reason)) print()
def config_rt_process(core, priority): gc.disable() set_realtime_priority(priority) set_core_affinity(core)
def main(argv): global BLEU_THRESHOLD_REACHED, START_TIME if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') init_mllogger() mllogger.event('cache_clear') mllogger.start('init_start') mllogger.event('submission_org', 'Google') mllogger.event('submission_platform', 'TPUv3-{}'.format(jax.device_count())) mllogger.event('submission_division', 'closed') mllogger.event('submission_status', 'research') mllogger.event('submission_benchmark', 'transformer') mllogger.event('train_samples', input_pipeline.N_TRAIN) mllogger.event('eval_samples', input_pipeline.N_EVAL) tf.enable_v2_behavior() # Use hardware RNG for bernoulli randoms in dropout mask creation. if FLAGS.hardware_rng: models.set_hardware_bernoulli() num_partitions = FLAGS.num_partitions batch_size = FLAGS.batch_size if batch_size is None: batch_size = min(16 * jax.device_count() // num_partitions, 2048) mllogger.event('global_batch_size', batch_size) num_eval_steps = FLAGS.num_eval_steps max_target_length = FLAGS.max_target_length max_eval_target_length = FLAGS.max_eval_target_length max_length = max(max_target_length, max_eval_target_length) mllogger.event('max_sequence_length', max_length, metadata={'method': 'discard'}) if FLAGS.random_seed is not None: seed = FLAGS.random_seed else: seed = np.uint32(time.time() if jax.host_id() == 0 else 0) seed = per_host_sum_pmap(seed) mllogger.event('seed', int(seed)) steps_per_epoch = int(math.ceil(input_pipeline.N_TRAIN / batch_size)) logging.info('steps per epoch: %d', steps_per_epoch) num_replicas = jax.local_device_count() // num_partitions device_train_input_shape = (batch_size // (num_replicas * jax.host_count()), max_target_length) # This is per-host; in principle 64/replica or more should fit eval_batch_size = min( 32 * num_replicas, int( math.ceil(input_pipeline.N_EVAL / (num_replicas * jax.host_count()))) * num_replicas) logging.info('eval batch size: %d', eval_batch_size) pred_batches = int( math.ceil(input_pipeline.N_EVAL / (jax.host_count() * eval_batch_size))) logging.info('pred batches: %d', pred_batches) broadcast = functools.partial(_broadcast, num_replicas=num_replicas, num_partitions=num_partitions) if jax.host_id() == 0: train_summary_writer = tensorboard.SummaryWriter( os.path.join(FLAGS.model_dir, 'train')) eval_summary_writer = tensorboard.SummaryWriter( os.path.join(FLAGS.model_dir, 'eval')) else: train_summary_writer = None eval_summary_writer = None # Write summaries in background thread to avoid blocking on device sync summary_thread = thread.ThreadPoolExecutor(1, 'summary') if FLAGS.infeed: # Infeed is currently synchronous, so do it in a background thread too infeed_pool = thread.ThreadPoolExecutor(jax.local_device_count(), 'infeed') # MLPerf 2020 WMT en-de dataset uses a custom T2T dataset: # Shared 32K subword tokenization # 256-length packed training examples from WMT17 # 97-length unpacked evaluation examples from WMT14 train_keys = [ 'inputs', 'targets', 'inputs_position', 'targets_position', 'inputs_segmentation', 'targets_segmentation' ] encoder = mlperf_encoder.SubwordTextEncoder(filename=FLAGS.vocab_path) input_encoder = encoder target_encoder = encoder vocab_size = input_encoder.vocab_size output_vocab_size = target_encoder.vocab_size input_shape = (batch_size, max_target_length) target_shape = (batch_size, max_target_length) transformer_kwargs = flax.core.FrozenDict({ 'vocab_size': vocab_size, 'output_vocab_size': output_vocab_size, 'emb_dim': 1024, 'num_heads': 16, 'num_layers': 6, 'qkv_dim': 1024, 'mlp_dim': 4096, 'max_len': max_length, 'share_embeddings': FLAGS.share_embeddings, 'logits_via_embedding': FLAGS.logits_via_embedding, 'num_partitions': num_partitions, }) rng = random.PRNGKey(seed) rng, init_rng = random.split(rng) logging.info('initializing model') model, cache_def = create_model(init_rng, tuple(input_shape), tuple(target_shape), transformer_kwargs) mllogger.event('opt_name', 'adam') if batch_size < 1024: learning_rate = 4.0 # 0.0625 warmup_steps = 1000 beta1 = 0.9 beta2 = 0.98 if batch_size < 2048: learning_rate = 2.0 warmup_steps = 500 # ?? beta1 = 0.9 # ?? beta2 = 0.98 # ?? else: learning_rate = 3.3092157691415953 warmup_steps = 664 beta1 = 0.9086575725261137 beta2 = 0.9198719118104947 epsilon = 1e-9 if FLAGS.learning_rate is not None: learning_rate = FLAGS.learning_rate mllogger.event('opt_adam_beta_1', beta1) mllogger.event('opt_adam_beta_2', beta2) mllogger.event('opt_adam_epsilon', epsilon) logging.info('initializing optimizer') optimizer_def = optim.Adam(learning_rate, beta1=beta1, beta2=beta2, eps=epsilon, weight_decay=FLAGS.weight_decay) optimizer = optimizer_def.create(model) del model # don't keep a copy of the initial model # Build parameter partition annotations for preserving partitions from train # to eval. partition_rules = [ (('encoder', 'posembed_input'), partitions.empty_dict), (('decoder', 'posembed_targets'), partitions.empty_dict), (('embedding', ), partitions.spec(num_partitions, 1)), ((r'LayerNorm_\d+', '(bias|scale)'), None), ((r'encoder(decoder)?_norm', '(bias|scale)'), None), ((r'MultiHeadDotProductAttention_\d+', '(query|key|value)', 'kernel'), partitions.spec(1, num_partitions, 1)), ((r'MultiHeadDotProductAttention_\d+', 'out', 'kernel'), partitions.spec(num_partitions, 1, 1)), ((r'MlpBlock_\d+', r'Dense_\d+', 'bias'), None), ((r'MlpBlock_\d+', 'Dense_0', 'kernel'), partitions.spec(1, num_partitions)), ((r'MlpBlock_\d+', 'Dense_1', 'kernel'), partitions.spec(num_partitions, 1)), (('state', 'step'), None), ] optimizer_partitions = optimizer.restore_state( partitions.set_partitions(partition_rules, optimizer.state_dict())) optimizer = broadcast(optimizer) empty_metrics = broadcast({'loss': 0.0, 'accuracy': 0, 'denominator': 0}) learning_rate_fn = create_learning_rate_scheduler( base_learning_rate=learning_rate, warmup_steps=warmup_steps, hidden_size=transformer_kwargs['qkv_dim']) p_train_step = jax.pmap(functools.partial( train_step, learning_rate_fn=learning_rate_fn), axis_name='batch', in_axes=(None, 0, 0, 0)) if num_partitions > 1: sharded_predict_step = sharded_jit( predict_step, in_parts=(None, optimizer_partitions.target, None), out_parts=None) else: sharded_predict_step = predict_step if FLAGS.extra_eval_metrics: p_eval_step = jax.pmap(eval_step, axis_name='batch', in_axes=(None, 0)) p_pred_step = jax.pmap(sharded_predict_step, axis_name='batch', in_axes=(0, None, None)) p_allreduce_metrics = jax.pmap(functools.partial(lax.psum, axis_name='batch'), axis_name='batch') def device_train_loop_cond(args): _, _, _, _, step, epoch = args return step // steps_per_epoch == epoch def device_train_loop_body(args): optimizer, dropout_rngs, metrics, token, step, epoch = args input_data, token = lax.infeed(token, shape=tuple([ jax.ShapedArray( device_train_input_shape, jnp.int32) for _ in train_keys ])) batch = {k: v for k, v in zip(train_keys, input_data)} optimizer, metrics, dropout_rngs = train_step(optimizer, batch, metrics, learning_rate_fn, dropout_rng=dropout_rngs) step += 1 return optimizer, dropout_rngs, metrics, token, step, epoch def device_train_loop(optimizer, dropout_rngs, metrics, step, epoch): token = lax.create_token(step) optimizer, dropout_rngs, metrics, _, step, _ = lax.while_loop( device_train_loop_cond, device_train_loop_body, (optimizer, dropout_rngs, metrics, token, step, epoch)) return optimizer, dropout_rngs, metrics, step if num_partitions > 1: device_train_loop = sharded_jit(device_train_loop, in_parts=(optimizer_partitions, None, None, None, None), out_parts=(optimizer_partitions, None, None, None)) p_train_epoch = jax.pmap(device_train_loop, axis_name='batch', in_axes=(None, 0, 0, None, None)) p_allreduce_metrics_train = functools.partial(lax.psum, axis_name='batch') if num_partitions > 1: p_allreduce_metrics_train = sharded_jit(p_allreduce_metrics_train, in_parts=None, out_parts=None, num_partitions=num_partitions) p_allreduce_metrics_train = jax.pmap(p_allreduce_metrics_train, axis_name='batch') # Precompile all needed computations with fake data so as not to include # compilation time in MLPerf metrics. if FLAGS.precompile: logging.info('precompiling step/epoch functions') if FLAGS.infeed: # the device training loop condition will immediately be false, but # the optimizer tree will be resharded here optimizer, *_ = p_train_epoch(unbroadcast(optimizer), random.split(rng, num_replicas), empty_metrics, jnp.array(0, dtype=jnp.int32), 1) else: metrics = empty_metrics train_input_shape = (num_replicas, batch_size // num_replicas, input_pipeline.MAX_TRAIN_LEN) fake_batch = { k: jnp.ones(train_input_shape, jnp.int32) for k in train_keys } p_train_step(unbroadcast(optimizer), fake_batch, metrics, dropout_rng=random.split(rng, num_replicas)) eval_input_shape = (num_replicas, eval_batch_size // num_replicas, input_pipeline.MAX_EVAL_LEN) fake_eval_batch = { 'inputs': jnp.ones(eval_input_shape, jnp.int32), 'targets': jnp.ones(eval_input_shape, jnp.int32), } if FLAGS.extra_eval_metrics: p_eval_step(unbroadcast(optimizer.target), fake_eval_batch) fake_cache = cache_def.initialize_cache( (eval_input_shape[1], FLAGS.max_predict_length)) p_pred_step(fake_eval_batch['inputs'], unbroadcast(optimizer.target), fake_cache) time.sleep(20) sync_devices() fake_bleu_1 = np.zeros((4, ), dtype=np.int32) fake_bleu_2 = np.zeros((), dtype=np.int32) per_host_sum_pmap((fake_bleu_1, fake_bleu_1, fake_bleu_2, fake_bleu_2)) sync_devices() p_allreduce_metrics_train(empty_metrics) sync_devices() logging.info('finished precompiling step/epoch functions') # We init the first set of dropout PRNG keys, but update it afterwards inside # the main pmap'd training update for performance. dropout_rngs = random.split(rng, num_replicas) # Record time-0 metrics for proper tensorboard plot x-axis scaling. if jax.host_id() == 0: if FLAGS.compute_train_metrics: train_summary_writer.scalar('loss', 9.999, 0) train_summary_writer.scalar('accuracy', 0.0, 0) train_summary_writer.flush() eval_summary_writer.scalar('bleu', 0.0, 0) eval_summary_writer.flush() train_ds = input_pipeline.get_wmt_dataset(batch_size=batch_size // jax.host_count(), train=True) eval_ds = input_pipeline.get_wmt_dataset(batch_size=eval_batch_size, train=False) train_iter = iter(train_ds) eval_iter = iter(eval_ds) local_devices = jax.local_devices() host_step, device_step = 0, broadcast(0) gc.disable() mllogger.end('init_stop') if jax.host_id() == 0: mllogger.start('run_start') START_TIME = time.time() for epoch in range(FLAGS.num_epochs): if jax.host_id() == 0 and not BLEU_THRESHOLD_REACHED: mllogger.start('block_start', metadata={ 'first_epoch_num': epoch + 1, 'epoch_count': 1 }) metrics = empty_metrics if FLAGS.infeed: optimizer, dropout_rngs, metrics, device_step = p_train_epoch( unbroadcast(optimizer), dropout_rngs, metrics, unbroadcast(device_step), epoch) while int(host_step // steps_per_epoch) == epoch: # pylint: disable=protected-access batch = jax.tree_map(lambda x: x._numpy(), next(train_iter)) # Shard data to devices and do a training step. batch = jax.tree_map( lambda x: x.reshape((num_replicas, -1) + x.shape[1:]), batch) if FLAGS.infeed: for i, device in enumerate(local_devices): replica_id = i // num_partitions input_tuple = tuple( [batch[k][replica_id] for k in train_keys]) assert input_tuple[0].shape == device_train_input_shape, ( 'infeed shape error %s != %s' % (input_tuple[0].shape, device_train_input_shape)) assert input_tuple[0].dtype == jnp.int32, ( 'infeed dtype error %s != %s' % (input_tuple[0].dtype, jnp.int32)) infeed_pool.submit( functools.partial(device.transfer_to_infeed, input_tuple)) else: optimizer, metrics, dropout_rngs = p_train_step( unbroadcast(optimizer), batch, metrics, dropout_rng=dropout_rngs) host_step += 1 if FLAGS.compute_train_metrics: metrics = p_allreduce_metrics_train(metrics) # Schedule training metric handling. summary_thread.submit( functools.partial(write_train_summary, metrics, train_summary_writer, host_step)) # Optional, extra evaluation metrics. if FLAGS.extra_eval_metrics: eval_metrics = [] eval_iter = iter(eval_ds) for _, eval_batch in zip(range(num_eval_steps), eval_iter): eval_batch = common_utils.shard(eval_batch) metrics = p_eval_step(unbroadcast(optimizer.target), eval_batch) eval_metrics.append(metrics) eval_metrics = p_allreduce_metrics(eval_metrics) # Schedule metric summarization/logging. summary_thread.submit( functools.partial(write_eval_summary, eval_metrics, eval_summary_writer, host_step)) # Translation and BLEU Score. all_predicted, all_targets, all_bs = [], [], [] for i in range(pred_batches): # pylint: disable=protected-access pred_batch = jax.tree_map(lambda x: x._numpy(), next(eval_iter)) # Handle final odd-sized batch by padding instead of dropping it. cur_pred_batch_size = pred_batch['inputs'].shape[0] if cur_pred_batch_size != eval_batch_size: pred_batch = jax.tree_map( lambda x: pad_examples(x, eval_batch_size), pred_batch) pred_batch = jax.tree_map( lambda x: x.reshape((num_replicas, -1) + x.shape[1:]), pred_batch) per_device_batchsize = pred_batch['inputs'].shape[1] cache = cache_def.initialize_cache( (per_device_batchsize, FLAGS.max_predict_length)) all_predicted.append( p_pred_step(pred_batch['inputs'], unbroadcast(optimizer.target), cache)) all_targets.append(pred_batch['targets']) all_bs.append(cur_pred_batch_size) # Schedule BLEU calculation and summarization/logging. # We use the ICI as part of BLEU score computation, so we call this from the # main thread so the BLEU pmap runs before the next train epoch pmap write_predict_summary(all_predicted, all_targets, all_bs, target_encoder, eval_summary_writer, epoch, host_step, summary_thread) # Wait until computations are done before exiting sync_devices() if jax.host_id() == 0: summary_thread.shutdown() if not BLEU_THRESHOLD_REACHED: mllogger.end('run_stop', metadata={'status': 'aborted'})
def map2dict(a_file, column, limit_counts_reads=7 * (10**7), size_buffer=10**8): # get read name and mismatches fi = open(a_file, 'r') pack = None last_read = None base = [] while True: gc.disable() du = fi.readlines(size_buffer) gc.enable() if not du: break gc.disable() du = [giveRC(d.rstrip('\r\n').split('\t'), column) for d in du] gc.enable() di = [] for d in du: if last_read != d[0]: gc.disable() di.append((d[0], 0 if not d[1] else d[1].count(':'))) gc.enable() last_read = d[0] if di: gc.disable() base.extend(di) gc.enable() di = [] if len(base) > limit_counts_reads: gc.disable() base = dict(base) gc.enable() yield base base = [] if base: gc.disable() base = dict(base) gc.enable() yield base base = [] fi.close()
def __enter__(self): self.gcold = gc.isenabled() if self.disable_gc: gc.disable() self.start_time = timeit.default_timer() return self
def disableGarbageCollector(): gc.disable() print('Disabled Garbage Collector.')
def _fork(self, path, uid, gid, executable, args, environment, **kwargs): """ Fork and then exec sub-process. @param path: the path where to run the new process. @type path: C{str} @param uid: if defined, the uid used to run the new process. @type uid: C{int} @param gid: if defined, the gid used to run the new process. @type gid: C{int} @param executable: the executable to run in a new process. @type executable: C{str} @param args: arguments used to create the new process. @type args: C{list}. @param environment: environment used for the new process. @type environment: C{dict}. @param kwargs: keyword arguments to L{_setupChild} method. """ settingUID = (uid is not None) or (gid is not None) if settingUID: curegid = os.getegid() currgid = os.getgid() cureuid = os.geteuid() curruid = os.getuid() if uid is None: uid = cureuid if gid is None: gid = curegid # prepare to change UID in subprocess os.setuid(0) os.setgid(0) collectorEnabled = gc.isenabled() gc.disable() try: self.pid = os.fork() except: # Still in the parent process if settingUID: os.setregid(currgid, curegid) os.setreuid(curruid, cureuid) if collectorEnabled: gc.enable() raise else: if self.pid == 0: # pid is 0 in the child process # do not put *ANY* code outside the try block. The child process # must either exec or _exit. If it gets outside this block (due # to an exception that is not handled here, but which might be # handled higher up), there will be two copies of the parent # running in parallel, doing all kinds of damage. # After each change to this code, review it to make sure there # are no exit paths. try: # Stop debugging. If I am, I don't care anymore. sys.settrace(None) self._setupChild(**kwargs) self._execChild(path, settingUID, uid, gid, executable, args, environment) except: # If there are errors, bail and try to write something # descriptive to stderr. # XXX: The parent's stderr isn't necessarily fd 2 anymore, or # even still available # XXXX: however even libc assumes write(2, err) is a useful # thing to attempt try: stderr = os.fdopen(2, 'w') stderr.write( "Upon execvpe %s %s in environment %s\n:" % (executable, str(args), "id %s" % id(environment))) traceback.print_exc(file=stderr) stderr.flush() for fd in range(3): os.close(fd) except: pass # make *sure* the child terminates # Did you read the comment about not adding code here? os._exit(1) # we are now in parent process if settingUID: os.setregid(currgid, curegid) os.setreuid(curruid, cureuid) if collectorEnabled: gc.enable() self.status = -1 # this records the exit status of the child
def opt_disablegc(self): """Disable the garbage collector""" gc.disable()
def disableGC(self) -> None: gc.collect() gc.disable()
def main(sm=None, pm=None): gc.disable() set_realtime_priority(5) if sm is None: sm = messaging.SubMaster(['liveLocationKalman', 'carState'], poll=['liveLocationKalman']) if pm is None: pm = messaging.PubMaster(['liveParameters']) params_reader = Params() # wait for stats about the car to come in from controls cloudlog.info("paramsd is waiting for CarParams") CP = car.CarParams.from_bytes(params_reader.get("CarParams", block=True)) cloudlog.info("paramsd got CarParams") min_sr, max_sr = 0.5 * CP.steerRatio, 2.0 * CP.steerRatio params = params_reader.get("LiveParameters") if not params_reader.get_bool( 'dp_reset_live_param_on_start') else None # Check if car model matches if params is not None: params = json.loads(params) if params.get('carFingerprint', None) != CP.carFingerprint: cloudlog.info("Parameter learner found parameters for wrong car.") params = None # Check if starting values are sane if params is not None: try: angle_offset_sane = abs(params.get('angleOffsetAverageDeg')) < 10.0 steer_ratio_sane = min_sr <= params['steerRatio'] <= max_sr params_sane = angle_offset_sane and steer_ratio_sane if not params_sane: cloudlog.info(f"Invalid starting values found {params}") params = None except Exception as e: cloudlog.info(f"Error reading params {params}: {str(e)}") params = None # TODO: cache the params with the capnp struct if params is None: params = { 'carFingerprint': CP.carFingerprint, 'steerRatio': CP.steerRatio, 'stiffnessFactor': 1.0, 'angleOffsetAverageDeg': 0.0, } cloudlog.info("Parameter learner resetting to default values") # When driving in wet conditions the stiffness can go down, and then be too low on the next drive # Without a way to detect this we have to reset the stiffness every drive params['stiffnessFactor'] = 1.0 learner = ParamsLearner(CP, params['steerRatio'], params['stiffnessFactor'], math.radians(params['angleOffsetAverageDeg'])) angle_offset_average = params['angleOffsetAverageDeg'] angle_offset = angle_offset_average while True: sm.update() for which in sorted(sm.updated.keys(), key=lambda x: sm.logMonoTime[x]): if sm.updated[which]: t = sm.logMonoTime[which] * 1e-9 learner.handle_log(t, which, sm[which]) if sm.updated['liveLocationKalman']: x = learner.kf.x P = np.sqrt(learner.kf.P.diagonal()) if not all(map(math.isfinite, x)): cloudlog.error( "NaN in liveParameters estimate. Resetting to default values" ) learner = ParamsLearner(CP, CP.steerRatio, 1.0, 0.0) x = learner.kf.x angle_offset_average = clip( math.degrees(x[States.ANGLE_OFFSET]), angle_offset_average - MAX_ANGLE_OFFSET_DELTA, angle_offset_average + MAX_ANGLE_OFFSET_DELTA) angle_offset = clip( math.degrees(x[States.ANGLE_OFFSET] + x[States.ANGLE_OFFSET_FAST]), angle_offset - MAX_ANGLE_OFFSET_DELTA, angle_offset + MAX_ANGLE_OFFSET_DELTA) msg = messaging.new_message('liveParameters') msg.logMonoTime = sm.logMonoTime['carState'] msg.liveParameters.posenetValid = True msg.liveParameters.sensorValid = True msg.liveParameters.steerRatio = float(x[States.STEER_RATIO]) msg.liveParameters.stiffnessFactor = float(x[States.STIFFNESS]) msg.liveParameters.angleOffsetAverageDeg = angle_offset_average msg.liveParameters.angleOffsetDeg = angle_offset msg.liveParameters.valid = all(( abs(msg.liveParameters.angleOffsetAverageDeg) < 10.0, abs(msg.liveParameters.angleOffsetDeg) < 10.0, 0.2 <= msg.liveParameters.stiffnessFactor <= 5.0, min_sr <= msg.liveParameters.steerRatio <= max_sr, )) msg.liveParameters.steerRatioStd = float(P[States.STEER_RATIO]) msg.liveParameters.stiffnessFactorStd = float(P[States.STIFFNESS]) msg.liveParameters.angleOffsetAverageStd = float( P[States.ANGLE_OFFSET]) msg.liveParameters.angleOffsetFastStd = float( P[States.ANGLE_OFFSET_FAST]) if sm.frame % 1200 == 0: # once a minute params = { 'carFingerprint': CP.carFingerprint, 'steerRatio': msg.liveParameters.steerRatio, 'stiffnessFactor': msg.liveParameters.stiffnessFactor, 'angleOffsetAverageDeg': msg.liveParameters.angleOffsetAverageDeg, } put_nonblocking("LiveParameters", json.dumps(params)) pm.send('liveParameters', msg)
def _teardown(): gc.disable() # do not interfere with timing tests yield None gc.enable() gc.collect()
def __init__(self, fname=None, fdata=None, decompress=False, disable_gc=True, slow_parsing=True): # Runs a lot faster with GC off. disable_gc = disable_gc and gc.isenabled() try: if disable_gc: gc.disable() if fname is not None: assert fdata is None # Allow reading preexisting streams like pyPdf if hasattr(fname, 'read'): fdata = fname.read() else: try: f = open(fname, 'rb') fdata = f.read() f.close() except IOError: raise PdfParseError('Could not read PDF file %s' % fname) assert fdata is not None if not fdata.startswith('%PDF-'): startloc = fdata.find('%PDF-') if startloc >= 0: log.warning('PDF header not at beginning of file') else: lines = fdata.lstrip().splitlines() if not lines: raise PdfParseError('Empty PDF file!') raise PdfParseError('Invalid PDF header: %s' % repr(lines[0])) endloc = fdata.rfind('%EOF') if endloc < 0: #log.error('EOF mark not found: %s' % # repr(fdata[-20:])) endloc = len(fdata) - 6 endloc += 6 junk = fdata[endloc:] # Done: It is not necessary to truncate the string. # Some PDFs just use wrong EOF at the end to confuse parsers. #fdata = fdata[:endloc] if junk.rstrip('\00').strip(): log.warning('Extra data at end of file') private = self.private private.indirect_objects = {} private.deferred_objects = set() private.special = {'<<': self.readdict, '[': self.readarray, 'endobj': self.empty_obj, } for tok in r'\ ( ) < > { } ] >> %'.split(): self.special[tok] = self.badtoken if slow_parsing == True: startloc = 0 source = PdfTokens(fdata, startloc, True) private.source = source # Calling next() just for complete the structure of source by adding source.current. source.next() source.all_offsets = [] source.obj_offsets = {} self.slow_parse_xref(source) # Done: add slow parsing for multiple trailers. trailer_loc = fdata.find('trailer') newdict = None while trailer_loc >= 0: source.floc = trailer_loc assert source.next() == "trailer" # trailer tok = source.next() # << if tok != '<<': source.exception('Expected "<<" starting catalog') # Ignored the corrupted trailer. try: tmpdict = self.readdict(source) except: pass else: if not newdict: newdict = tmpdict else: newdict.update(tmpdict) finally: trailer_loc = fdata.find('trailer', trailer_loc+1) if newdict is not None: newdict.Prev = None else: source.exception("No trailer.") else: startloc, source = self.findxref(fdata) private.source = source xref_table_list = [] source.all_offsets = [] while 1: source.obj_offsets = {} # Loop through all the cross-reference tables self.parsexref(source) tok = source.next() if tok != '<<': source.exception('Expected "<<" starting catalog') newdict = self.readdict(source) token = source.next() if token != 'startxref' and not xref_table_list: source.warning('Expected "startxref" at end of xref table') # Loop if any previously-written tables. prev = newdict.Prev if prev is None: break if not xref_table_list: newdict.Prev = None original_indirect = self.indirect_objects.copy() original_newdict = newdict source.floc = int(prev) xref_table_list.append(source.obj_offsets) self.indirect_objects.clear() if xref_table_list: for update in reversed(xref_table_list): source.obj_offsets.update(update) self.indirect_objects.clear() self.indirect_objects.update(original_indirect) newdict = original_newdict self.update(newdict) # self.read_all_indirect(source) private.pages = self.readpages(self.Root) if decompress: self.uncompress() # For compatibility with pyPdf private.numPages = len(self.pages) finally: if disable_gc: gc.enable() # load the trace fname_trace = fname + '.trace' if os.path.isfile(fname_trace): f = open(fname_trace, 'rb') private.active_trace = pickle.load(f) f.close()
# WARNING - this application disables python garbage collection, to slightly improve performance # comment this out if you see randon crashes import RPi.GPIO as GPIO # to allow control of GPIO import time # for delay timing from time import sleep # for delay timing import datetime # for real time from datetime import datetime # for real time numbers import glob # needed to support automatic setup of DS18B20 # try commenting these 2 lines out if you see random crashes import gc # for managing garbage collection gc.disable() # disable garabage collect, minimises some random dislay blips # on slower Pi's #define GPIO pin usage - BCM numbering assumed throughout GPIO.setmode(GPIO.BCM) #turn off warning GPIO.setwarnings(False) #segments segA = 5 segB = 6 segC = 13 segD = 19
def controlsd_thread(gctx=None): gc.disable() # start the loop set_realtime_priority(3) params = Params() # Pub Sockets sendcan = messaging.pub_sock(service_list['sendcan'].port) controlsstate = messaging.pub_sock(service_list['controlsState'].port) carstate = messaging.pub_sock(service_list['carState'].port) carcontrol = messaging.pub_sock(service_list['carControl'].port) carevents = messaging.pub_sock(service_list['carEvents'].port) carparams = messaging.pub_sock(service_list['carParams'].port) is_metric = params.get("IsMetric") == "1" passive = params.get("Passive") != "0" sm = messaging.SubMaster([ 'thermal', 'health', 'liveCalibration', 'driverMonitoring', 'plan', 'pathPlan' ]) logcan = messaging.sub_sock(service_list['can'].port) # wait for health and CAN packets hw_type = messaging.recv_one(sm.sock['health']).health.hwType is_panda_black = hw_type == log.HealthData.HwType.blackPanda wait_for_can(logcan) CI, CP = get_car(logcan, sendcan, is_panda_black) logcan.close() # TODO: Use the logcan socket from above, but that will currenly break the tests can_timeout = None if os.environ.get('NO_CAN_TIMEOUT', False) else 100 can_sock = messaging.sub_sock(service_list['can'].port, timeout=can_timeout) car_recognized = CP.carName != 'mock' # If stock camera is disconnected, we loaded car controls and it's not chffrplus controller_available = CP.enableCamera and CI.CC is not None and not passive read_only = not car_recognized or not controller_available if read_only: CP.safetyModel = car.CarParams.SafetyModel.elm327 # diagnostic only # Write CarParams for radard and boardd safety mode params.put("CarParams", CP.to_bytes()) params.put("LongitudinalControl", "1" if CP.openpilotLongitudinalControl else "0") CC = car.CarControl.new_message() AM = AlertManager() startup_alert = get_startup_alert(car_recognized, controller_available) AM.add(sm.frame, startup_alert, False) LoC = LongControl(CP, CI.compute_gb) VM = VehicleModel(CP) if CP.lateralTuning.which() == 'pid': LaC = LatControlPID(CP) elif CP.lateralTuning.which() == 'indi': LaC = LatControlINDI(CP) elif CP.lateralTuning.which() == 'lqr': LaC = LatControlLQR(CP) driver_status = DriverStatus() state = State.disabled soft_disable_timer = 0 v_cruise_kph = 255 v_cruise_kph_last = 0 overtemp = False free_space = False cal_status = Calibration.INVALID cal_perc = 0 mismatch_counter = 0 low_battery = False events_prev = [] sm['pathPlan'].sensorValid = True sm['pathPlan'].posenetValid = True # detect sound card presence sounds_available = not os.path.isfile('/EON') or ( os.path.isdir('/proc/asound/card0') and open('/proc/asound/card0/state').read().strip() == 'ONLINE') # controlsd is driven by can recv, expected at 100Hz rk = Ratekeeper(100, print_delay_threshold=None) prof = Profiler(False) # off by default while True: start_time = sec_since_boot() prof.checkpoint("Ratekeeper", ignore=True) # Sample data and compute car events CS, events, cal_status, cal_perc, overtemp, free_space, low_battery, mismatch_counter =\ data_sample(CI, CC, sm, can_sock, cal_status, cal_perc, overtemp, free_space, low_battery, driver_status, state, mismatch_counter, params) prof.checkpoint("Sample") # Create alerts if not sm.all_alive_and_valid(): events.append( create_event('commIssue', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if not sm['pathPlan'].mpcSolutionValid: events.append( create_event('plannerError', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE])) if not sm['pathPlan'].sensorValid: events.append( create_event('sensorDataInvalid', [ET.NO_ENTRY, ET.PERMANENT])) if not sm['pathPlan'].paramsValid: events.append(create_event('vehicleModelInvalid', [ET.WARNING])) if not sm['pathPlan'].posenetValid: events.append( create_event('posenetInvalid', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if not sm['plan'].radarValid: events.append( create_event('radarFault', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if sm['plan'].radarCanError: events.append( create_event('radarCanError', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if not CS.canValid: events.append( create_event('canError', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE])) if not sounds_available: events.append( create_event('soundsUnavailable', [ET.NO_ENTRY, ET.PERMANENT])) # Only allow engagement with brake pressed when stopped behind another stopped car if CS.brakePressed and sm[ 'plan'].vTargetFuture >= STARTING_TARGET_SPEED and not CP.radarOffCan and CS.vEgo < 0.3: events.append( create_event('noTarget', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE])) if not read_only: # update control state state, soft_disable_timer, v_cruise_kph, v_cruise_kph_last = \ state_transition(sm.frame, CS, CP, state, events, soft_disable_timer, v_cruise_kph, AM) prof.checkpoint("State transition") # Compute actuators (runs PID loops and lateral MPC) actuators, v_cruise_kph, driver_status, v_acc, a_acc, lac_log = \ state_control(sm.frame, sm.rcv_frame, sm['plan'], sm['pathPlan'], CS, CP, state, events, v_cruise_kph, v_cruise_kph_last, AM, rk, driver_status, LaC, LoC, VM, read_only, is_metric, cal_perc) prof.checkpoint("State Control") # Publish data CC, events_prev = data_send(sm, CS, CI, CP, VM, state, events, actuators, v_cruise_kph, rk, carstate, carcontrol, carevents, carparams, controlsstate, sendcan, AM, driver_status, LaC, LoC, read_only, start_time, v_acc, a_acc, lac_log, events_prev) prof.checkpoint("Sent") rk.monitor_time() prof.display()
def main(args): global running_loss, start_t # logging info that needs to persist across iterations viz = utils.visualize.VisdomVisualize(env_name=args.env_name) viz.viz.text(str(args)) running_loss = None running_accs = { qtype: 0.5 for qtype, _ in enumerate(utils.QTYPE_ID_TO_META) } start_t = None # data dataset = FigQADataset(args.figqa_dir, args.figqa_pre, split='train1') dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, shuffle=bool(args.shuffle_train)) val_dataset = FigQADataset(args.figqa_dir, args.figqa_pre, split=args.val_split) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, shuffle=True) # model if args.start_from: model = utils.load_model(fname=args.start_from, ngpus=args.cuda) else: model_args = figqa.options.model_args(args) model = utils.load_model(model_args, ngpus=args.cuda) # optimization optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) def exp_lr(epoch): iters = epoch * len(dataloader) return args.lr_decay**iters scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, exp_lr) criterion = nn.NLLLoss() # training for epoch in range(args.epochs): checkpoint_stuff(**locals()) scheduler.step() start_t = timer() # TODO: understand when/why automatic garbage collection slows down # the train loop gc.disable() data_time = time.time() start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) end.record() for local_iter_idx, batch in batch_iter(dataloader, args): start.record() torch.cuda.synchronize() iter_idx = local_iter_idx + epoch * len(dataloader) # forward + update optimizer.zero_grad() pred = model(batch) loss = criterion(pred, batch['answer']) loss.backward() optimizer.step() #Chargrid: Gradient Clipping #torch.nn.utils.clip_grad_value_([model.module.decision_weight],0.1) # visualize, log, checkpoint #print("visualize") log_stuff(**locals()) #for idx,timing in enumerate(["FW","Loss","Backw","Opt","log"]): #print(timing,times[idx+1]-times[idx]) #data_time = time.time() end.record() torch.cuda.synchronize() #print("Training: ",start.elapsed_time(end)) gc.enable()
def controlsd_thread(gctx=None, rate=100, default_bias=0.): gc.disable() # start the loop set_realtime_priority(3) context = zmq.Context() params = Params() # Pub Sockets live100 = messaging.pub_sock(context, service_list['live100'].port) carstate = messaging.pub_sock(context, service_list['carState'].port) carcontrol = messaging.pub_sock(context, service_list['carControl'].port) livempc = messaging.pub_sock(context, service_list['liveMpc'].port) is_metric = params.get("IsMetric") == "1" passive = params.get("Passive") != "0" # No sendcan if passive if not passive: sendcan = messaging.pub_sock(context, service_list['sendcan'].port) else: sendcan = None # Sub sockets poller = zmq.Poller() thermal = messaging.sub_sock(context, service_list['thermal'].port, conflate=True, poller=poller) health = messaging.sub_sock(context, service_list['health'].port, conflate=True, poller=poller) cal = messaging.sub_sock(context, service_list['liveCalibration'].port, conflate=True, poller=poller) driver_monitor = messaging.sub_sock(context, service_list['driverMonitoring'].port, conflate=True, poller=poller) gps_location = messaging.sub_sock(context, service_list['gpsLocationExternal'].port, conflate=True, poller=poller) logcan = messaging.sub_sock(context, service_list['can'].port) CC = car.CarControl.new_message() CI, CP = get_car(logcan, sendcan, 1.0 if passive else None) if CI is None: raise Exception("unsupported car") # if stock camera is connected, then force passive behavior if not CP.enableCamera: passive = True sendcan = None if passive: CP.safetyModel = car.CarParams.SafetyModels.noOutput # Get FCW toggle from settings fcw_enabled = params.get("IsFcwEnabled") == "1" geofence = None PL = Planner(CP, fcw_enabled) LoC = LongControl(CP, CI.compute_gb) VM = VehicleModel(CP) LaC = LatControl(CP) AM = AlertManager() driver_status = DriverStatus() if not passive: AM.add("startup", False) # Write CarParams for radard and boardd safety mode params.put("CarParams", CP.to_bytes()) state = State.disabled soft_disable_timer = 0 v_cruise_kph = 255 v_cruise_kph_last = 0 overtemp = False free_space = False cal_status = Calibration.INVALID cal_perc = 0 mismatch_counter = 0 low_battery = False rk = Ratekeeper(rate, print_delay_threshold=2./1000) # Read angle offset from previous drive, fallback to default angle_offset = default_bias calibration_params = params.get("CalibrationParams") if calibration_params: try: calibration_params = json.loads(calibration_params) angle_offset = calibration_params["angle_offset2"] except (ValueError, KeyError): pass prof = Profiler(False) # off by default while True: prof.checkpoint("Ratekeeper", ignore=True) # Sample data and compute car events CS, events, cal_status, cal_perc, overtemp, free_space, low_battery, mismatch_counter = data_sample(CI, CC, thermal, cal, health, driver_monitor, gps_location, poller, cal_status, cal_perc, overtemp, free_space, low_battery, driver_status, geofence, state, mismatch_counter, params) prof.checkpoint("Sample") # Define longitudinal plan (MPC) plan, plan_ts = calc_plan(CS, CP, events, PL, LaC, LoC, v_cruise_kph, driver_status, geofence) prof.checkpoint("Plan") if not passive: # update control state state, soft_disable_timer, v_cruise_kph, v_cruise_kph_last = \ state_transition(CS, CP, state, events, soft_disable_timer, v_cruise_kph, AM) prof.checkpoint("State transition") # Compute actuators (runs PID loops and lateral MPC) actuators, v_cruise_kph, driver_status, angle_offset = state_control(plan, CS, CP, state, events, v_cruise_kph, v_cruise_kph_last, AM, rk, driver_status, PL, LaC, LoC, VM, angle_offset, passive, is_metric, cal_perc) prof.checkpoint("State Control") # Publish data CC = data_send(PL.perception_state, plan, plan_ts, CS, CI, CP, VM, state, events, actuators, v_cruise_kph, rk, carstate, carcontrol, live100, livempc, AM, driver_status, LaC, LoC, angle_offset, passive) prof.checkpoint("Sent") rk.keep_time() # Run at 100Hz prof.display()
def loads(self, msg, encoding=None, raw=False): ''' Run the correct loads serialization format :param encoding: Useful for Python 3 support. If the msgpack data was encoded using "use_bin_type=True", this will differentiate between the 'bytes' type and the 'str' type by decoding contents with 'str' type to what the encoding was set as. Recommended encoding is 'utf-8' when using Python 3. If the msgpack data was not encoded using "use_bin_type=True", it will try to decode all 'bytes' and 'str' data (the distinction has been lost in this case) to what the encoding is set as. In this case, it will fail if any of the contents cannot be converted. ''' try: def ext_type_decoder(code, data): if code == 78: data = salt.utils.stringutils.to_unicode(data) return datetime.datetime.strptime(data, '%Y%m%dT%H:%M:%S.%f') return data gc.disable() # performance optimization for msgpack loads_kwargs = {'use_list': True, 'ext_hook': ext_type_decoder} if msgpack.version >= (0, 4, 0): # msgpack only supports 'encoding' starting in 0.4.0. # Due to this, if we don't need it, don't pass it at all so # that under Python 2 we can still work with older versions # of msgpack. if msgpack.version >= (0, 5, 2): if encoding is None: loads_kwargs['raw'] = True else: loads_kwargs['raw'] = False else: loads_kwargs['encoding'] = encoding try: ret = msgpack.loads(msg, **loads_kwargs) except UnicodeDecodeError: # msg contains binary data loads_kwargs.pop('raw', None) loads_kwargs.pop('encoding', None) ret = msgpack.loads(msg, **loads_kwargs) else: ret = msgpack.loads(msg, **loads_kwargs) if six.PY3 and encoding is None and not raw: ret = salt.transport.frame.decode_embedded_strs(ret) except Exception as exc: log.critical( 'Could not deserialize msgpack message. This often happens ' 'when trying to read a file not in binary mode. ' 'To see message payload, enable debug logging and retry. ' 'Exception: %s', exc ) log.debug('Msgpack deserialization failure on message: %s', msg) gc.collect() raise six.raise_from( SaltDeserializationError( 'Could not deserialize msgpack message.' ' See log for more info.' ), exc, ) finally: gc.enable() return ret
def majorUpgrade(self, doSave=False): """ """ try: xtime = time.time() wasEnabled = gc.isenabled() if wasEnabled: gc.disable() newRoot = self.newRoot # old to new data object map self.oldToNew = {self.oldRoot: newRoot} # Modify old object hierarchy in place self.massageData() # load all new data before modifying IO map from memops.general.Util import loadAllData loadAllData(newRoot) tt = xtime xtime = time.time() print('+++ Done load new data - time %s' % (xtime - tt)) # check for existing files, move old file as needed, and save oldPath = getDataDir(self.oldRoot, self.oldVersionStr) newPath = getDataDir(newRoot, self.newVersionStr) # save destination tests. NB done regardless to avoid accidental overwrites if oldPath == newPath: # using same name - change old version to '_bak' dest = oldPath + '_bak' if os.path.exists(dest): raise ApiError("Cannot move old data, %s already exists" % dest) # move old data - must be done here to avoid accidental reading instead # files on oldPath location if os.path.exists(oldPath): os.rename(oldPath, dest) # check that new locations are free: if os.path.exists(newPath): raise ApiError("Cannot save new data, %s already exists" % newPath) # Make new data and do minor upgrade self.transferData() finally: if wasEnabled: gc.enable() tt = xtime xtime = time.time() print('+++ Done transfer data - time %s ' % (xtime - tt)) # check validity # NB complete=True checking will be slow - may be relaxed later newRoot.checkAllValid(complete=True) tt = xtime xtime = time.time() print('+++ Done complete checkValid time %s' % (xtime - tt)) if doSave: # save new data newRoot.saveModified() tt = xtime xtime = time.time() print('+++ Done save result - time %s' % (xtime - tt)) # return newRoot
def closest_genes(inputfile=None, outputfile=None, from_region_type=None, no_header=False, nb_neighbors=1, to_region_type=None, same_strandedness=False, diff_strandedness=False, text_format=False, identifier="gene_id", collapse=False): """ Find the n closest genes for each gene. """ if same_strandedness and diff_strandedness: message( "--same-strandedness and --diff-strandedness are " "mutually exclusive.", type="ERROR") # ---------------------------------------------------------------------- # load GTF # ---------------------------------------------------------------------- gtf = GTF(inputfile) gn_gtf = gtf.select_by_key("feature", "gene") gn_ids = gn_gtf.get_gn_ids(nr=True) if len(gn_gtf) == 0: message("No gene feature found. Please use convert_ensembl.", type="ERROR") if nb_neighbors >= (len(gn_gtf) - 1): message("Two much neighbors", type="ERROR") all_ids = gn_gtf.extract_data(identifier, as_list=True, no_na=False) if "." in all_ids: message("Some identifiers are undefined ('.').", type="ERROR") if len(all_ids) == 0: message("The identifier was not found.", type="ERROR") # ---------------------------------------------------------------------- # load GTF and requested regions (for source/'from' transcript) # ---------------------------------------------------------------------- if from_region_type == 'tss': from_regions = gn_gtf.get_5p_end( feat_type="gene", name=[identifier], ).cut([0, 1, 2, 3, 4, 5]).sort() elif from_region_type == 'tts': from_regions = gn_gtf.get_3p_end( feat_type="gene", name=[identifier], ).cut([0, 1, 2, 3, 4, 5]).sort() elif from_region_type == 'gene': from_regions = gn_gtf.to_bed(name=[identifier ], ).cut([0, 1, 2, 3, 4, 5]).sort() else: message("Unknown type.", type="ERROR") # ---------------------------------------------------------------------- # load GTF and requested regions (for dest/'to' transcript) # ---------------------------------------------------------------------- if to_region_type == 'tss': to_regions = gn_gtf.get_5p_end( feat_type="gene", name=[identifier], ).cut([0, 1, 2, 3, 4, 5]).sort() elif to_region_type == 'tts': to_regions = gn_gtf.get_3p_end( feat_type="gene", name=[identifier], ).cut([0, 1, 2, 3, 4, 5]).sort() elif to_region_type == 'gene': to_regions = gn_gtf.to_bed(name=[identifier], ).cut([0, 1, 2, 3, 4, 5]).sort() else: message("Unknown type.", type="ERROR") # ---------------------------------------------------------------------- # Search closest genes # ---------------------------------------------------------------------- gene_closest = defaultdict(list) gene_closest_dist = defaultdict(list) closest_bo = from_regions.closest(b=to_regions, k=nb_neighbors, N=True, s=same_strandedness, S=diff_strandedness, d=True) for i in closest_bo: gene_closest[i[3]] += [i[9]] gene_closest_dist[i[3]] += [i[12]] if not text_format: if len(gene_closest): gtf = gtf.add_attr_from_dict(feat="gene", key=identifier, a_dict=gene_closest, new_key="closest_gn") gtf = gtf.add_attr_from_dict(feat="gene", key=identifier, a_dict=gene_closest_dist, new_key="closest_dist") gtf.write(outputfile, gc_off=True) else: if not no_header: outputfile.write("genes\tclosest_genes\tdistances\n") for gene in gn_ids: if not collapse: outputfile.write("\t".join([ gene, ",".join(gene_closest[gene]), ",".join( gene_closest_dist[gene]) ]) + "\n") else: for closest, dist in zip(gene_closest[gene], gene_closest_dist[gene]): outputfile.write("\t".join([gene, closest, dist]) + "\n") gc.disable() close_properly(outputfile, inputfile)
def sra2illumina(input_file, output_file, tag_read = None, tag='', phred_conversion = False, operation = 'change', tmp_dir = None, size_read_buffer = 10**8): """ It converts the FASTQ file (PHRED-33 qualities and SRA read names) downloaded from Short Read Archive (SRA) to Illumina FASTQ file (PHRED-64 Illumina v1.5 and Illumina read names). """ temp_file = None if phred_conversion: temp_file = give_me_temp_filename(tmp_dir) else: temp_file = output_file read_name = file(input_file,'r').readline().rstrip('\r\n') sra = False e = read_name.partition(" ")[0] if read_name.startswith('@') and ( not(e.endswith('/1') or e.endswith('/2'))): sra = True if operation == 'change' or sra: fid = open(input_file,'r') fod = open(temp_file,'w') i = 0 r = 0 while True: gc.disable() lines = fid.readlines(size_read_buffer) gc.enable() if not lines: break n = len(lines) for j in xrange(n): r = r + 1 i = i + 1 if i == 1: if tag_read: lines[j] = '@%s%s%s\n' % (tag_read ,int2str(r,12) , tag) else: # if there is no tag_read then the original SRA id is left lines[j] = '%s%s\n' % (lines[j][:-1].partition(" ")[0], tag) #lines[j] = lines[j].rstrip('\r\n').upper().split(' ')[1]+tag+'\n' elif i == 3: lines[j] = "+\n" elif i == 4: i = 0 fod.writelines(lines) fid.close() fod.close() if phred_conversion == '64': phred.fq2fq(temp_file,'sanger',output_file,'illumina-1.5',tmp_dir = tmp_dir) os.remove(temp_file) elif phred_conversion == '33': phred.fq2fq(temp_file,'auto-detect',output_file,'sanger',tmp_dir = tmp_dir) os.remove(temp_file) else: print "No changes are done!" if os.path.isfile(output_file): os.remove(output_file) if operation == 'soft': if os.path.islink(input_file): linkto = os.readlink(input_file) os.symlink(linkto,ooutput_file) else: os.symlink(input_file,output_file) elif operation == 'hard': linkto = input_file if os.path.islink(input_file): linkto = os.readlink(input_file) try: os.link(linkto,output_file) except OSError as er: print >>sys.stderr,"WARNING: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file) shutil.copyfile(linkto,output_file) # if er.errno == errno.EXDEV: # # they are on different partitions # # [Errno 18] Invalid cross-device link # shutil.copyfile(linkto,output_file) # else: # print >>sys.stderr,"ERROR: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file) # print >>sys.stderr,er # sys.exit(1) elif operation == 'copy': shutil.copyfile(input_file, output_file) else: print >>sys.stderr, "ERROR: unknown operation of linking!", operation sys.exit(1)
def preprocessing(args): gc.enable() # 目的変数 target_name = 'TARGET' #=========================== # 無用なデータを除外(結合前) #=========================== # application_{train|test} df_application_train = pd.read_csv( os.path.join(args.dataset_dir, "application_train.csv")) df_application_test = pd.read_csv( os.path.join(args.dataset_dir, "application_test.csv")) #df_application_train.drop(['FLAG_DOCUMENT_2', 'FLAG_DOCUMENT_4', 'FLAG_DOCUMENT_5', 'FLAG_DOCUMENT_7', 'FLAG_DOCUMENT_9', 'FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11', 'FLAG_DOCUMENT_12', 'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_14', 'FLAG_DOCUMENT_15', 'FLAG_DOCUMENT_16', 'FLAG_DOCUMENT_17', 'FLAG_DOCUMENT_18', 'FLAG_DOCUMENT_19', 'FLAG_DOCUMENT_20', 'FLAG_DOCUMENT_21'], axis=1, inplace=True) #df_application_test.drop(['FLAG_DOCUMENT_2', 'FLAG_DOCUMENT_4', 'FLAG_DOCUMENT_5', 'FLAG_DOCUMENT_7', 'FLAG_DOCUMENT_9', 'FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11', 'FLAG_DOCUMENT_12', 'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_14', 'FLAG_DOCUMENT_15', 'FLAG_DOCUMENT_16', 'FLAG_DOCUMENT_17', 'FLAG_DOCUMENT_18', 'FLAG_DOCUMENT_19', 'FLAG_DOCUMENT_20', 'FLAG_DOCUMENT_21'], axis=1, inplace=True) #=========================== # サブ構造の結合 #=========================== # 元データ df_train = df_application_train df_test = df_application_test #--------------------------- # bureau #--------------------------- df_bureau = pd.read_csv(os.path.join(args.dataset_dir, "bureau.csv")) for col in df_bureau.columns: # ラベル情報のエンコード if (df_bureau[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_bureau[col])) df_bureau[col] = label_encoder.transform(list(df_bureau[col])) # 同じ SK_ID_CURR の行を 過去の申込み回数(SK_ID_CURR あたりの SK_ID_BUREAU の個数), 各々の特徴量の mean, max, min, で集約する。 df_bureau_agg = df_bureau.groupby('SK_ID_CURR', as_index=False).agg( ['count', 'mean', 'max', 'min']).reset_index() df_bureau_agg.columns = rename_columns_levels(df_bureau_agg, "bureau", 'SK_ID_CURR') #print( df_bureau_agg.shape ) #print( df_bureau_agg.head() ) # 元のデータに統合 df_train = pd.merge(df_train, df_bureau_agg, on='SK_ID_CURR', how='left') df_test = pd.merge(df_test, df_bureau_agg, on='SK_ID_CURR', how='left') # 不要になったメモリを解放 del df_bureau_agg gc.collect() #--------------------------- # bureau_balance #--------------------------- df_bureau_balance = pd.read_csv( os.path.join(args.dataset_dir, "bureau_balance.csv")) for col in df_bureau_balance.columns: # ラベル情報のエンコード if (df_bureau_balance[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_bureau_balance[col])) df_bureau_balance[col] = label_encoder.transform( list(df_bureau_balance[col])) # 同じ SK_ID_BUREAU を集約 df_bureau_balance_agg = df_bureau_balance.groupby( 'SK_ID_BUREAU', as_index=False).agg(['count', 'mean', 'max', 'min']).reset_index() df_bureau_balance_agg.columns = rename_columns_levels( df_bureau_balance_agg, "bureau_balance", 'SK_ID_BUREAU') # 親データ (df_bureau) の 'SK_ID_CURR' に、対応する 'SK_ID_BUREAU' を紐付け df_bureau_balance_agg = df_bureau[['SK_ID_BUREAU', 'SK_ID_CURR' ]].merge(df_bureau_balance_agg, on='SK_ID_BUREAU', how='left') # 1つの `SK_ID_CURR` に対して、複数の `SK_ID_BUREAU` が存在することになるので、`SK_ID_CURR` を集約 df_bureau_balance_agg = df_bureau_balance_agg.drop( columns=['SK_ID_BUREAU']).groupby('SK_ID_CURR', as_index=False).agg( ['count', 'mean', 'max', 'min']).reset_index() df_bureau_balance_agg.columns = rename_columns_levels( df_bureau_balance_agg, "bureau_balance", 'SK_ID_CURR') #print( df_bureau_balance_agg.shape ) #print( df_bureau_balance_agg.head() ) # 元のデータに統合 df_train = pd.merge(df_train, df_bureau_balance_agg, on='SK_ID_CURR', how='left') df_test = pd.merge(df_test, df_bureau_balance_agg, on='SK_ID_CURR', how='left') #print( df_train.shape ) #print( df_train.head() ) # 不要になったメモリを解放 del df_bureau, df_bureau_balance, df_bureau_balance_agg gc.collect() #--------------------------- # previous_application #-------------------------- df_previous_application = pd.read_csv( os.path.join(args.dataset_dir, "previous_application.csv")) for col in df_previous_application.columns: # ラベル情報のエンコード if (df_previous_application[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_previous_application[col])) df_previous_application[col] = label_encoder.transform( list(df_previous_application[col])) df_previous_application_agg = df_previous_application.groupby( 'SK_ID_CURR', as_index=False).agg(['count', 'mean', 'max', 'min']).reset_index() df_previous_application_agg.columns = rename_columns_levels( df_previous_application_agg, "revious_application", 'SK_ID_CURR') # 元データに統合 df_train = pd.merge(df_train, df_previous_application_agg, on='SK_ID_CURR', how='left') df_test = pd.merge(df_test, df_previous_application_agg, on='SK_ID_CURR', how='left') # 不要になったメモリを解放 del df_previous_application_agg gc.collect() #--------------------------- # pos_cash_balance #--------------------------- df_pos_cash_balance = pd.read_csv( os.path.join(args.dataset_dir, "POS_CASH_balance.csv")) for col in df_pos_cash_balance.columns: # ラベル情報のエンコード if (df_pos_cash_balance[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_pos_cash_balance[col])) df_pos_cash_balance[col] = label_encoder.transform( list(df_pos_cash_balance[col])) df_pos_cash_balance_agg = df_pos_cash_balance.groupby( 'SK_ID_PREV', as_index=False).agg(['count', 'mean', 'max', 'min']).reset_index() #print( df_pos_cash_balance_agg.head() ) df_pos_cash_balance_agg.columns = rename_columns_levels( df_pos_cash_balance_agg, "pos_cash_balance", 'SK_ID_PREV') # 親データの 'SK_ID_CURR' に、対応する 'SK_ID_PREV' を紐付け df_pos_cash_balance_agg = df_previous_application[[ 'SK_ID_PREV', 'SK_ID_CURR' ]].merge(df_pos_cash_balance_agg, on='SK_ID_PREV', how='left') #print( df_pos_cash_balance_agg.head() ) # 1つの `SK_ID_CURR` に対して、複数の `SK_ID_PREV` が存在することになるので、`SK_ID_CURR` を集約 df_pos_cash_balance_agg = df_pos_cash_balance_agg.drop( columns=['SK_ID_PREV']).groupby('SK_ID_CURR', as_index=False).agg( ['count', 'mean', 'max', 'min']).reset_index() df_pos_cash_balance_agg.columns = rename_columns_levels( df_pos_cash_balance_agg, "bureau_balance", 'SK_ID_CURR') #print( df_pos_cash_balance_agg.head() ) # 元データに統合 df_train = pd.merge(df_train, df_pos_cash_balance_agg, on='SK_ID_CURR', how='left') df_test = pd.merge(df_test, df_pos_cash_balance_agg, on='SK_ID_CURR', how='left') # 不要になったメモリを解放 del df_pos_cash_balance, df_pos_cash_balance_agg gc.collect() #--------------------------- # installments_payments #--------------------------- df_installments_payments = pd.read_csv( os.path.join(args.dataset_dir, "installments_payments.csv")) for col in df_installments_payments.columns: # ラベル情報のエンコード if (df_installments_payments[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_installments_payments[col])) df_installments_payments[col] = label_encoder.transform( list(df_installments_payments[col])) df_installments_payments_agg = df_installments_payments.groupby( 'SK_ID_PREV', as_index=False).agg(['count', 'mean', 'max', 'min']).reset_index() df_installments_payments_agg.columns = rename_columns_levels( df_installments_payments_agg, "installments_payments", 'SK_ID_PREV') # 親データの 'SK_ID_CURR' に、対応する 'SK_ID_PREV' を紐付け df_installments_payments_agg = df_previous_application[[ 'SK_ID_PREV', 'SK_ID_CURR' ]].merge(df_installments_payments_agg, on='SK_ID_PREV', how='left') # 1つの `SK_ID_CURR` に対して、複数の `SK_ID_PREV` が存在することになるので、`SK_ID_CURR` を集約 df_installments_payments_agg = df_installments_payments_agg.drop( columns=['SK_ID_PREV']).groupby('SK_ID_CURR', as_index=False).agg( ['count', 'mean', 'max', 'min']).reset_index() df_installments_payments_agg.columns = rename_columns_levels( df_installments_payments_agg, "installments_payments", 'SK_ID_CURR') # 元データに統合 df_train = pd.merge(df_train, df_installments_payments_agg, on='SK_ID_CURR', how='left') df_test = pd.merge(df_test, df_installments_payments_agg, on='SK_ID_CURR', how='left') # 不要になったメモリを解放 del df_installments_payments, df_installments_payments_agg gc.collect() #--------------------------- # credit_card_balance #--------------------------- df_credit_card_balance = pd.read_csv( os.path.join(args.dataset_dir, "credit_card_balance.csv")) for col in df_credit_card_balance.columns: # ラベル情報のエンコード if (df_credit_card_balance[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_credit_card_balance[col])) df_credit_card_balance[col] = label_encoder.transform( list(df_credit_card_balance[col])) df_credit_card_balance_agg = df_credit_card_balance.groupby( 'SK_ID_PREV', as_index=False).agg(['count', 'mean', 'max', 'min']).reset_index() df_credit_card_balance_agg.columns = rename_columns_levels( df_credit_card_balance_agg, "credit_card_balance", 'SK_ID_PREV') # 親データの 'SK_ID_CURR' に、対応する 'SK_ID_PREV' を紐付け df_credit_card_balance_agg = df_previous_application[[ 'SK_ID_PREV', 'SK_ID_CURR' ]].merge(df_credit_card_balance_agg, on='SK_ID_PREV', how='left') # 1つの `SK_ID_CURR` に対して、複数の `SK_ID_PREV` が存在することになるので、`SK_ID_CURR` を集約 df_credit_card_balance_agg = df_credit_card_balance_agg.drop( columns=['SK_ID_PREV']).groupby('SK_ID_CURR', as_index=False).agg( ['count', 'mean', 'max', 'min']).reset_index() df_credit_card_balance_agg.columns = rename_columns_levels( df_credit_card_balance_agg, "installments_payments", 'SK_ID_CURR') # 元データに統合 df_train = pd.merge(df_train, df_credit_card_balance_agg, on='SK_ID_CURR', how='left') df_test = pd.merge(df_test, df_credit_card_balance_agg, on='SK_ID_CURR', how='left') # 不要になったメモリを解放 del df_credit_card_balance, df_credit_card_balance_agg gc.collect() #=========================== # 特徴量の追加(結合後) #=========================== df_train[ 'DAYS_EMPLOYED_ANOM'] = df_train["DAYS_EMPLOYED"] == 365243 # 異常値のフラグ df_train['DAYS_EMPLOYED'].replace({365243: np.nan}, inplace=True) df_test[ 'DAYS_EMPLOYED_ANOM'] = df_test["DAYS_EMPLOYED"] == 365243 # 異常値のフラグ df_test['DAYS_EMPLOYED'].replace({365243: np.nan}, inplace=True) # 時系列データ df_train['DAYS_BIRTH'] = -1 * df_train['DAYS_BIRTH'] df_test['DAYS_BIRTH'] = -1 * df_test['DAYS_BIRTH'] df_train['YEARS_BIRTH'] = df_train['DAYS_BIRTH'] / 365 df_test['YEARS_BIRTH'] = df_test['DAYS_BIRTH'] / 365 #df_train['YEARS_BINNED'] = pd.cut(df_train['YEARS_BIRTH'], bins = np.linspace(20, 70, num = 11)) #df_test['YEARS_BINNED'] = pd.cut(df_test['YEARS_BIRTH'], bins = np.linspace(20, 70, num = 11)) #=========================== # 無用なデータを除外(結合後) #=========================== if 'SK_ID_CURR' in df_train.columns: df_train.drop(['SK_ID_CURR'], axis=1, inplace=True) df_test.drop(['SK_ID_CURR'], axis=1, inplace=True) if 'SK_ID_BUREAU' in df_train.columns: df_train.drop(['SK_ID_BUREAU'], axis=1, inplace=True) df_test.drop(['SK_ID_BUREAU'], axis=1, inplace=True) if 'SK_ID_PREV' in df_train.columns: df_train.drop(['SK_ID_PREV'], axis=1, inplace=True) df_test.drop(['SK_ID_PREV'], axis=1, inplace=True) #=========================== # 全特徴量を一括で処理 #=========================== # 全データセット df_data = pd.concat([df_train, df_test], sort=False) for col in df_train.columns: # 目的変数 if (col in [target_name]): continue #----------------------------- # ラベル情報のエンコード #----------------------------- if (df_train[col].dtypes == "object"): label_encoder = LabelEncoder() label_encoder.fit(list(df_data[col])) df_train[col] = label_encoder.transform(list(df_train[col])) label_encoder = LabelEncoder() label_encoder.fit(list(df_data[col])) df_test[col] = label_encoder.transform(list(df_test[col])) #----------------------------- # 欠損値の埋め合わせ #----------------------------- # NAN 値の埋め合わせ(平均値) if (col in ["OWN_CAR_AGE"]): # データセット全体 df_data での平均値とする df_train[col].fillna(np.mean(df_data[col]), inplace=True) df_test[col].fillna(np.mean(df_data[col]), inplace=True) # NAN 値の埋め合わせ(ゼロ値)/ int 型 elif (df_train[col].dtypes in [ "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64" ]): df_train[col].fillna(0, inplace=True) df_test[col].fillna(0, inplace=True) # NAN 値の埋め合わせ(ゼロ値)/ float 型 elif (df_train[col].dtypes in ["float16", "float32", "float64", "float128"]): df_train[col].fillna(0.0, inplace=True) df_test[col].fillna(0.0, inplace=True) # NAN 値の補完(None値)/ object 型 else: df_train[col] = df_train[col].fillna('NA') df_test[col] = df_test[col].fillna('NA') #----------------------------- # 正規化処理 #----------------------------- """ if( df_train[col].dtypes in ["float16", "float32", "float64", "float128"] ): scaler = StandardScaler() scaler.fit( df_train[col].values.reshape(-1,1) ) df_train[col] = scaler.transform( df_train[col].values.reshape(-1,1) ) df_test[col] = scaler.transform( df_test[col].values.reshape(-1,1) ) """ #----------------------------- # 値が単一の特徴量をクレンジング #----------------------------- """ if( df_train[col].nunique() == 1 ): print( "remove {} : {}".format(col,df_train[col].nunique()) ) df_train.drop([col], axis=1, inplace=True) df_test.drop([col], axis=1, inplace=True) """ gc.disable() return df_train, df_test
def dictionaryAttack(attack_dict): suffix_list = [""] custom_wordlist = [""] hash_regexes = [] results = [] resumes = [] user_hash = [] processException = False foundHash = False for (_, hashes) in attack_dict.items(): for hash_ in hashes: if not hash_: continue hash_ = hash_.split()[0] if hash_ and hash_.strip() else hash_ regex = hashRecognition(hash_) if regex and regex not in hash_regexes: hash_regexes.append(regex) infoMsg = "using hash method '%s'" % __functions__[ regex].func_name logger.info(infoMsg) for hash_regex in hash_regexes: keys = set() attack_info = [] for (user, hashes) in attack_dict.items(): for hash_ in hashes: if not hash_: continue foundHash = True hash_ = hash_.split()[0] if hash_ and hash_.strip() else hash_ if re.match(hash_regex, hash_): item = None if hash_regex not in (HASH.CRYPT_GENERIC, HASH.WORDPRESS): hash_ = hash_.lower() if hash_regex in (HASH.MYSQL, HASH.MYSQL_OLD, HASH.MD5_GENERIC, HASH.SHA1_GENERIC): item = [(user, hash_), {}] elif hash_regex in (HASH.ORACLE_OLD, HASH.POSTGRES): item = [(user, hash_), {'username': user}] elif hash_regex in (HASH.ORACLE, ): item = [(user, hash_), {'salt': hash_[-20:]}] elif hash_regex in (HASH.MSSQL, HASH.MSSQL_OLD, HASH.MSSQL_NEW): item = [(user, hash_), {'salt': hash_[6:14]}] elif hash_regex in (HASH.CRYPT_GENERIC, ): item = [(user, hash_), {'salt': hash_[0:2]}] elif hash_regex in (HASH.WORDPRESS, ): if ITOA64.index(hash_[3]) < 32: item = [(user, hash_), { 'salt': hash_[4:12], 'count': 1 << ITOA64.index(hash_[3]), 'prefix': hash_[:12] }] else: warnMsg = "invalid hash '%s'" % hash_ logger.warn(warnMsg) if item and hash_ not in keys: resumed = hashDBRetrieve(hash_) if not resumed: attack_info.append(item) user_hash.append(item[0]) else: infoMsg = "resuming password '%s' for hash '%s'" % ( resumed, hash_) if user and not user.startswith(DUMMY_USER_PREFIX): infoMsg += " for user '%s'" % user logger.info(infoMsg) resumes.append((user, hash_, resumed)) keys.add(hash_) if not attack_info: continue if not kb.wordlists: while not kb.wordlists: # the slowest of all methods hence smaller default dict if hash_regex in (HASH.ORACLE_OLD, HASH.WORDPRESS): dictPaths = [paths.SMALL_DICT] else: dictPaths = [paths.WORDLIST] message = "what dictionary do you want to use?\n" message += "[1] default dictionary file '%s' (press Enter)\n" % dictPaths[ 0] message += "[2] custom dictionary file\n" message += "[3] file with list of dictionary files" choice = readInput(message, default='1') try: if choice == '2': message = "what's the custom dictionary's location?\n" _ = readInput(message) if _: dictPaths = [readInput(message)] logger.info("using custom dictionary") elif choice == '3': message = "what's the list file location?\n" listPath = readInput(message) checkFile(listPath) dictPaths = getFileItems(listPath) logger.info("using custom list of dictionaries") else: logger.info("using default dictionary") dictPaths = filter(None, dictPaths) for dictPath in dictPaths: checkFile(dictPath) if os.path.splitext(dictPath)[1].lower() == ".zip": _ = zipfile.ZipFile(dictPath, 'r') if len(_.namelist()) == 0: errMsg = "no file(s) inside '%s'" % dictPath raise SqlmapDataException(errMsg) else: _.open(_.namelist()[0]) kb.wordlists = dictPaths except Exception, ex: warnMsg = "there was a problem while loading dictionaries" warnMsg += " ('%s')" % getSafeExString(ex) logger.critical(warnMsg) message = "do you want to use common password suffixes? (slow!) [y/N] " if readInput(message, default='N', boolean=True): suffix_list += COMMON_PASSWORD_SUFFIXES infoMsg = "starting dictionary-based cracking (%s)" % __functions__[ hash_regex].func_name logger.info(infoMsg) for item in attack_info: ((user, _), _) = item if user and not user.startswith(DUMMY_USER_PREFIX): custom_wordlist.append(normalizeUnicode(user)) if hash_regex in (HASH.MYSQL, HASH.MYSQL_OLD, HASH.MD5_GENERIC, HASH.SHA1_GENERIC): for suffix in suffix_list: if not attack_info or processException: break if suffix: clearConsoleLine() infoMsg = "using suffix '%s'" % suffix logger.info(infoMsg) retVal = None processes = [] try: if _multiprocessing: if _multiprocessing.cpu_count() > 1: infoMsg = "starting %d processes " % _multiprocessing.cpu_count( ) singleTimeLogMessage(infoMsg) gc.disable() retVal = _multiprocessing.Queue() count = _multiprocessing.Value( 'i', _multiprocessing.cpu_count()) for i in xrange(_multiprocessing.cpu_count()): process = _multiprocessing.Process( target=_bruteProcessVariantA, args=(attack_info, hash_regex, suffix, retVal, i, count, kb.wordlists, custom_wordlist, conf.api)) processes.append(process) for process in processes: process.daemon = True process.start() while count.value > 0: time.sleep(0.5) else: warnMsg = "multiprocessing hash cracking is currently " warnMsg += "not supported on this platform" singleTimeWarnMessage(warnMsg) retVal = Queue() _bruteProcessVariantA(attack_info, hash_regex, suffix, retVal, 0, 1, kb.wordlists, custom_wordlist, conf.api) except KeyboardInterrupt: print processException = True warnMsg = "user aborted during dictionary-based attack phase (Ctrl+C was pressed)" logger.warn(warnMsg) for process in processes: try: process.terminate() process.join() except (OSError, AttributeError): pass finally: if _multiprocessing: gc.enable() if retVal: conf.hashDB.beginTransaction() while not retVal.empty(): user, hash_, word = item = retVal.get(block=False) attack_info = filter( lambda _: _[0][0] != user or _[0][1] != hash_, attack_info) hashDBWrite(hash_, word) results.append(item) conf.hashDB.endTransaction() clearConsoleLine() else: for ((user, hash_), kwargs) in attack_info: if processException: break if any(_[0] == user and _[1] == hash_ for _ in results): continue count = 0 found = False for suffix in suffix_list: if found or processException: break if suffix: clearConsoleLine() infoMsg = "using suffix '%s'" % suffix logger.info(infoMsg) retVal = None processes = [] try: if _multiprocessing: if _multiprocessing.cpu_count() > 1: infoMsg = "starting %d processes " % _multiprocessing.cpu_count( ) singleTimeLogMessage(infoMsg) gc.disable() retVal = _multiprocessing.Queue() found_ = _multiprocessing.Value('i', False) count = _multiprocessing.Value( 'i', _multiprocessing.cpu_count()) for i in xrange(_multiprocessing.cpu_count()): process = _multiprocessing.Process( target=_bruteProcessVariantB, args=(user, hash_, kwargs, hash_regex, suffix, retVal, found_, i, count, kb.wordlists, custom_wordlist, conf.api)) processes.append(process) for process in processes: process.daemon = True process.start() while count.value > 0: time.sleep(0.5) found = found_.value != 0 else: warnMsg = "multiprocessing hash cracking is currently " warnMsg += "not supported on this platform" singleTimeWarnMessage(warnMsg) class Value(): pass retVal = Queue() found_ = Value() found_.value = False _bruteProcessVariantB(user, hash_, kwargs, hash_regex, suffix, retVal, found_, 0, 1, kb.wordlists, custom_wordlist, conf.api) found = found_.value except KeyboardInterrupt: print processException = True warnMsg = "user aborted during dictionary-based attack phase (Ctrl+C was pressed)" logger.warn(warnMsg) for process in processes: try: process.terminate() process.join() except (OSError, AttributeError): pass finally: if _multiprocessing: gc.enable() if retVal: conf.hashDB.beginTransaction() while not retVal.empty(): user, hash_, word = item = retVal.get( block=False) hashDBWrite(hash_, word) results.append(item) conf.hashDB.endTransaction() clearConsoleLine()
Parse function for setuptools_scm that ignores tags for non-C++ subprojects, e.g. apache-arrow-js-XXX tags. """ from setuptools_scm.git import parse kwargs['describe_command'] = \ "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'" return parse(root, **kwargs) __version__ = setuptools_scm.get_version('../', parse=parse_git) except ImportError: __version__ = None # ARROW-8684: Disable GC while initializing Cython extension module, # to workaround Cython bug in https://github.com/cython/cython/issues/3603 _gc_enabled = _gc.isenabled() _gc.disable() import pyarrow.lib as _lib if _gc_enabled: _gc.enable() from pyarrow.lib import cpu_count, set_cpu_count from pyarrow.lib import ( null, bool_, int8, int16, int32, int64, uint8, uint16, uint32, uint64, time32, time64, timestamp, date32, date64, duration, float16, float32, float64, binary, string, utf8, large_binary, large_string, large_utf8, decimal128, list_, large_list, map_, struct, union, dictionary, field, type_for_alias, DataType, DictionaryType, StructType, ListType, LargeListType, MapType, FixedSizeListType, UnionType, TimestampType, Time32Type, Time64Type, DurationType, FixedSizeBinaryType, Decimal128Type, BaseExtensionType, ExtensionType, PyExtensionType, UnknownExtensionType, register_extension_type, unregister_extension_type, DictionaryMemo,
def main(): print "\tCreating 1 unreachable object => should be destroyed automatically" make_unreachable_object() print "\tCollect...=> no unreachable or uncollectable should be visible" collected = gc.collect(0) assert (collected == 0) assert (gc.garbage == []) print "\tCreating 1 unreachable object with ref cycles" make_unreachable_object_with_ref_cycles() print "\tCollect...=> 1 unreachable and 0 uncollectable should be visible" collected = gc.collect(0) assert (collected == 1) assert (gc.garbage == []) print "\tCreating 1 uncollectable object" make_uncollectable_object() print "\tCollect...=> Unreachables and uncollectables should be visible" collected = gc.collect(0) assert (collected != 0) assert (gc.garbage != []) if __name__ == "__main__": gc.disable() # disable automatic garbage collection gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_COLLECTABLE | gc.DEBUG_UNCOLLECTABLE | gc.DEBUG_INSTANCES | gc.DEBUG_OBJECTS) # activate gc debug flags main() gc.set_debug(0) # deactivate gc debug flags
def controlsd_thread(sm=None, pm=None, can_sock=None): gc.disable() # start the loop set_realtime_priority(3) params = Params() is_metric = params.get("IsMetric", encoding='utf8') == "1" is_ldw_enabled = params.get("IsLdwEnabled", encoding='utf8') == "1" passive = params.get("Passive", encoding='utf8') == "1" openpilot_enabled_toggle = params.get("OpenpilotEnabledToggle", encoding='utf8') == "1" community_feature_toggle = params.get("CommunityFeaturesToggle", encoding='utf8') == "1" passive = passive or not openpilot_enabled_toggle # Pub/Sub Sockets if pm is None: pm = messaging.PubMaster(['sendcan', 'controlsState', 'carState', 'carControl', 'carEvents', 'carParams']) if sm is None: sm = messaging.SubMaster(['thermal', 'health', 'liveCalibration', 'driverMonitoring', 'plan', 'pathPlan', \ 'model', 'gpsLocation'], ignore_alive=['gpsLocation']) if can_sock is None: can_timeout = None if os.environ.get('NO_CAN_TIMEOUT', False) else 100 can_sock = messaging.sub_sock('can', timeout=can_timeout) # wait for health and CAN packets hw_type = messaging.recv_one(sm.sock['health']).health.hwType has_relay = hw_type in [HwType.blackPanda, HwType.uno] print("Waiting for CAN messages...") messaging.get_one_can(can_sock) CI, CP = get_car(can_sock, pm.sock['sendcan'], has_relay) car_recognized = CP.carName != 'mock' # If stock camera is disconnected, we loaded car controls and it's not chffrplus controller_available = CP.enableCamera and CI.CC is not None and not passive community_feature_disallowed = CP.communityFeature and not community_feature_toggle read_only = not car_recognized or not controller_available or CP.dashcamOnly or community_feature_disallowed if read_only: CP.safetyModel = car.CarParams.SafetyModel.noOutput # Write CarParams for radard and boardd safety mode params.put("CarParams", CP.to_bytes()) params.put("LongitudinalControl", "1" if CP.openpilotLongitudinalControl else "0") CC = car.CarControl.new_message() AM = AlertManager() startup_alert = get_startup_alert(car_recognized, controller_available) AM.add(sm.frame, startup_alert, False) LoC = LongControl(CP, CI.compute_gb) VM = VehicleModel(CP) if CP.lateralTuning.which() == 'pid': LaC = LatControlPID(CP) elif CP.lateralTuning.which() == 'indi': LaC = LatControlINDI(CP) elif CP.lateralTuning.which() == 'lqr': LaC = LatControlLQR(CP) driver_status = DriverStatus() is_rhd = params.get("IsRHD") if is_rhd is not None: driver_status.is_rhd = bool(int(is_rhd)) state = State.disabled soft_disable_timer = 0 v_cruise_kph = 255 v_cruise_kph_last = 0 mismatch_counter = 0 last_blinker_frame = 0 events_prev = [] sm['liveCalibration'].calStatus = Calibration.INVALID sm['pathPlan'].sensorValid = True sm['pathPlan'].posenetValid = True sm['thermal'].freeSpace = 1. # detect sound card presence sounds_available = not os.path.isfile('/EON') or (os.path.isdir('/proc/asound/card0') and open('/proc/asound/card0/state').read().strip() == 'ONLINE') # controlsd is driven by can recv, expected at 100Hz rk = Ratekeeper(100, print_delay_threshold=None) internet_needed = params.get("Offroad_ConnectivityNeeded", encoding='utf8') is not None prof = Profiler(False) # off by default while True: start_time = sec_since_boot() prof.checkpoint("Ratekeeper", ignore=True) # Sample data and compute car events CS, events, cal_perc, mismatch_counter = data_sample(CI, CC, sm, can_sock, driver_status, state, mismatch_counter, params) prof.checkpoint("Sample") # Create alerts if not sm.all_alive_and_valid(): events.append(create_event('commIssue', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if not sm['pathPlan'].mpcSolutionValid: events.append(create_event('plannerError', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE])) if not sm['pathPlan'].sensorValid: events.append(create_event('sensorDataInvalid', [ET.NO_ENTRY, ET.PERMANENT])) if not sm['pathPlan'].paramsValid: events.append(create_event('vehicleModelInvalid', [ET.WARNING])) if not sm['pathPlan'].posenetValid: events.append(create_event('posenetInvalid', [ET.NO_ENTRY, ET.WARNING])) if not sm['plan'].radarValid: events.append(create_event('radarFault', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if sm['plan'].radarCanError: events.append(create_event('radarCanError', [ET.NO_ENTRY, ET.SOFT_DISABLE])) if not CS.canValid: events.append(create_event('canError', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE])) if not sounds_available: events.append(create_event('soundsUnavailable', [ET.NO_ENTRY, ET.PERMANENT])) if internet_needed: events.append(create_event('internetConnectivityNeeded', [ET.NO_ENTRY, ET.PERMANENT])) if community_feature_disallowed: events.append(create_event('communityFeatureDisallowed', [ET.PERMANENT])) # Only allow engagement with brake pressed when stopped behind another stopped car if CS.brakePressed and sm['plan'].vTargetFuture >= STARTING_TARGET_SPEED and not CP.radarOffCan and CS.vEgo < 0.3: events.append(create_event('noTarget', [ET.NO_ENTRY, ET.IMMEDIATE_DISABLE])) if not read_only: # update control state state, soft_disable_timer, v_cruise_kph, v_cruise_kph_last = \ state_transition(sm.frame, CS, CP, state, events, soft_disable_timer, v_cruise_kph, AM) prof.checkpoint("State transition") # Compute actuators (runs PID loops and lateral MPC) actuators, v_cruise_kph, driver_status, v_acc, a_acc, lac_log, last_blinker_frame = \ state_control(sm.frame, sm.rcv_frame, sm['plan'], sm['pathPlan'], CS, CP, state, events, v_cruise_kph, v_cruise_kph_last, AM, rk, driver_status, LaC, LoC, read_only, is_metric, cal_perc, last_blinker_frame) prof.checkpoint("State Control") # Publish data CC, events_prev = data_send(sm, pm, CS, CI, CP, VM, state, events, actuators, v_cruise_kph, rk, AM, driver_status, LaC, LoC, read_only, start_time, v_acc, a_acc, lac_log, events_prev, last_blinker_frame, is_ldw_enabled) prof.checkpoint("Sent") rk.monitor_time() prof.display()