def __exit__(self, *args):
     self.end = self.timer()
     if self.disable_gc and self.gc_state:
         gc.enable()
     self.interval = self.end - self.start
     if self.verbose:
         print('time taken: %f seconds' % self.interval)
Beispiel #2
0
    def test(self, view):
        """
        Calls the given view and measures the time for it to return. The
        garbage collector is diabled during execution.
        """
        gc_old = gc.isenabled()
        gc.disable()
        try:
            start = timeit.default_timer()
            if view.method == 'GET':
                response = self.client.get(view.url, view.data)
            elif view.method == 'POST':
                response = self.client.post(view.url, view.data)
            else:
                raise ValueError('Unknown view method: %s' % view.method)

            end = timeit.default_timer()
            # Return result in milliseconds
            time_ms = (end - start) * 1000
            # Try to get version information
            version = subprocess.check_output(['git', 'describe'])

            from .models import TestResult
            return TestResult(view=view, time=time_ms, result=response,
                              result_code=response.status_code, version=version)
        finally:
            if gc_old:
                gc.enable()
Beispiel #3
0
def getlines(a_filename):
    # it gives chunks
    fin = None
    if a_filename == '-':
        fin = sys.stdin
    else:
        fin = open(a_filename,'r')
    header = dict()
    first = True
    while True:
        lines = fin.readlines(10**8)
        if not lines:
            break
        gc.disable()
        lines = [line.rstrip('\r\n').split('\t') for line in lines if line.rstrip('\r\n')]
        gc.enable()
        for line in lines:
            if line[0].startswith('@'):
                if line[0].startswith('@SQ') and line[1].startswith('SN:') and line[2].startswith('LN:'):
                    k = line[1][3:]
                    v = int(line[2][3:])
                    header[k] = v
                else:
                    pass
            else:
                if first:
                    first = False
                    yield header
                    header = None
                yield line
    if first and header:
        yield header
    fin.close()
Beispiel #4
0
 def __exit__(self, exc_type, exc_value, tb):
     gc.collect()
     new_objects = len(gc.get_objects())
     if new_objects > self.old_objects:
         pytest.fail('Example code leaked')
     _gc_lock.release()
     gc.enable()
    def test_trashcan(self):
        class Ouch:
            n = 0
            def __del__(self):
                Ouch.n = Ouch.n + 1
                if Ouch.n % 17 == 0:
                    gc.collect()

        # "trashcan" is a hack to prevent stack overflow when deallocating
        # very deeply nested tuples etc.  It works in part by abusing the
        # type pointer and refcount fields, and that can yield horrible
        # problems when gc tries to traverse the structures.
        # If this test fails (as it does in 2.0, 2.1 and 2.2), it will
        # most likely die via segfault.

        # Note:  In 2.3 the possibility for compiling without cyclic gc was
        # removed, and that in turn allows the trashcan mechanism to work
        # via much simpler means (e.g., it never abuses the type pointer or
        # refcount fields anymore).  Since it's much less likely to cause a
        # problem now, the various constants in this expensive (we force a lot
        # of full collections) test are cut back from the 2.2 version.
        gc.enable()
        N = 150
        for count in range(2):
            t = []
            for i in range(N):
                t = [t, Ouch()]
            u = []
            for i in range(N):
                u = [u, Ouch()]
            v = {}
            for i in range(N):
                v = {1: v, 2: Ouch()}
        gc.disable()
Beispiel #6
0
 def load(self):
     try:
         env = Environment.Environment(os.path.join(self.cachedir, "build.config.py"))
     except (IOError, OSError):
         pass
     else:
         if env["version"] < HEXVERSION:
             raise Utils.WafError("Version mismatch! reconfigure the project")
         for t in env["tools"]:
             self.setup(**t)
     try:
         gc.disable()
         f = data = None
         Node.Nodu = self.node_class
         try:
             f = open(os.path.join(self.bdir, DBFILE), "rb")
         except (IOError, EOFError):
             pass
         try:
             if f:
                 data = cPickle.load(f)
         except AttributeError:
             if Logs.verbose > 1:
                 raise
         if data:
             for x in SAVED_ATTRS:
                 setattr(self, x, data[x])
         else:
             debug("build: Build cache loading failed")
     finally:
         if f:
             f.close()
         gc.enable()
Beispiel #7
0
def main_measure(data, dataname,agentClassGenerator,params):
    
    X_POINTS        = params[scp.X_POINTS]
    STEP            = params[scp.STEP]
    NUM_FOLDS       = params[scp.NUM_FOLDS]
    CLASSIFY_TIME   = params[scp.CLASSIFY_TIME]
    LEARN_TIME      = params[scp.LEARN_TIME]
    SEED            = params[scp.SEED]
    
    num_features_arr =  [ i*STEP for i in range(1,X_POINTS +1) ]
    print "\n============= Learning Curve ==================="
    print "Evaluating", dataname
    print "num_features,", "accuracy%,"
     
    results=[]
    for num_features in num_features_arr:
        agentClass = agentClassGenerator(num_features)
        
        try:
            gc.disable()
            confusion = AgentAnalyzer().run_one(data, agentClass, CLASSIFY_TIME, LEARN_TIME, num_folds=NUM_FOLDS, seed=SEED)
            gc.enable()
            gc.collect()
            
            idf = s_common.idf(NUM_FOLDS, data, num_features)
            results.append( (num_features,confusion, idf) )
            print num_features, ',', confusion.getAccuracyStr()
        
        except Exception, e:
            print "Error:",e
            print " Possible Timeout for", num_features
Beispiel #8
0
def scrape_links(delay=1, savelimit=100):

    gc.enable()

    movies, full_movies, count = load_json('links.json'), [], 1

    for movie in movies:

        if count < 4601:
            print count
            count += 1
            continue

        time.sleep(delay)

        full_data = scrape_movie_page(movie['link'])

        movie.update(full_data)

        full_movies.append(movie)

        print count, movie['name'], movie['year'], movie['revenue']

        count += 1

        if count % savelimit == 0:
            rank = [str(count-savelimit), '-', str(count)]
            path = 'data/movies' + ' '.join(rank) + '.json'
            save_json(full_movies, path)
            full_movies = []
            gc.collect()
            print '%s ranked movies saved' % ' '.join(rank)
Beispiel #9
0
 def _exitfunc(cls):
     # At shutdown invoke finalizers for which atexit is true.
     # This is called once all other non-daemonic threads have been
     # joined.
     reenable_gc = False
     try:
         if cls._registry:
             import gc
             if gc.isenabled():
                 reenable_gc = True
                 gc.disable()
             pending = None
             while True:
                 if pending is None or finalize._dirty:
                     pending = cls._select_for_exit()
                     finalize._dirty = False
                 if not pending:
                     break
                 f = pending.pop()
                 try:
                     # gc is disabled, so (assuming no daemonic
                     # threads) the following is the only line in
                     # this function which might trigger creation
                     # of a new finalizer
                     f()
                 except Exception:
                     sys.excepthook(*sys.exc_info())
                 assert f not in cls._registry
     finally:
         # prevent any more finalizers from executing during shutdown
         finalize._shutdown = True
         if reenable_gc:
             gc.enable()
Beispiel #10
0
def main():
    from optparse import OptionParser
    import Zope
    gc.enable()
    app = Zope.app()
    parser = OptionParser()
    parser.add_option('-u', '--user', dest='username', default='admin')
    parser.add_option('-p', '--path', dest='path', default='')
    parser.add_option('-o', '--output', dest='output', default='')
    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                      default=False)
    parser.add_option('-i', '--ignore', dest='ignored_types',
                      action='store', default=IGNORED_TYPES,
                      help="Provide comma separated List of Portal Types "
                      "to ignore")
    parser.add_option('-b', '--batch_size', dest='batch_size', default=0)
    parser.add_option('-s', '--batch_start', dest='batch_start', default=0)
    options, args = parser.parse_args()
    options.app = app
    if isinstance(options.ignored_types, basestring):
        options.ignored_types = options.ignored_types.split(',')
    options.batch_start = int(options.batch_start)
    options.batch_size = int(options.batch_size)
    export_site(app, options)
    transaction.commit()
  def testNoReferenceCyclesAfterCall(self):

    class ChildNetwork(network.Network):

      def __init__(self, name=None):
        super(ChildNetwork, self).__init__(name=name)

      def call(self, x):
        return x * 2.

    class ParentNetwork(network.Network):

      def __init__(self, name=None):
        super(ParentNetwork, self).__init__(name=name)
        self.l1 = self.track_layer(ChildNetwork())

      def call(self, x):
        return self.l1(x)

    one = constant_op.constant([[1.0]])
    gc.disable()
    gc.collect()
    previous_gc_debug_flags = gc.get_debug()
    gc.set_debug(gc.DEBUG_SAVEALL)
    preexisting = len(gc.garbage)
    net = ParentNetwork()
    net(one)
    del net
    gc.collect()
    # There should be no additional garbage requiring collection.
    self.assertEqual(preexisting, len(gc.garbage))
    gc.set_debug(previous_gc_debug_flags)
    gc.enable()
Beispiel #12
0
def get_data(n, s, e, metadata=False):

    gc.enable() # turn on garbage collection
    pop, comments = tools.date_range_sample(n, s, e)
    print 'Loaded %d comments' % pop
    print 'Random sample of %d from date range' % n
    gc.collect()
    print 'Garbage Collection complete'

    features, metafeatures, labels, = [], [], []

    for c in comments:
        text = c['commentBody'] # text is the feature data
        features.append(text.encode('ascii','ignore'))
        labels.append(discretize_r(c['recommendationCount']))

        if metadata:
            c_sec = s_codes[c['section']]
            c_wc = discretize(int(c['wordcount']), wrdcnt)
            c_rnk = discretize(int(c['timeRank']), trnk)
            c_elp = discretize(int(c['elapsedTime']), eTime)
            c_pol = discretize(get_polarity(c['sentiment']), plrty)

            metafeatures.append([c_wc, c_rnk, c_elp, c_sec])

    print 'Extracted text (features) and class labels'
    if not metadata: return (features, labels)
    else: return (features, metafeatures, labels)
def reads_from_fastq_file(file_name, size_read_buffer = 10**8):
    fid = None
    if file_name == '-':
        fid = sys.stdin
    elif file_name.lower().endswith('.gz'):
        fid = gzip.open(file_name,'r')
    else:
        fid = open(file_name,'r')
    piece = [None,None,None,None]
    ij = 0
    while True:
        gc.disable()
        lines = fid.readlines(size_read_buffer)
        gc.enable()
        if not lines:
            break
        for line in lines:
            ij = ij + 1
            piece[ij-1] = line
            if ij == 4:
                bucket = (piece[0].rstrip('\r\n')[1:],
                          piece[1].rstrip('\r\n'),
                          piece[3].rstrip('\r\n'))
                yield bucket
                piece = [None,None,None,None]
                ij = 0
    fid.close()
Beispiel #14
0
def reads_from_fastq_file(f_name,size_read_buffer=10**8):
    fid = None
    if f_name == '-':
        fid = sys.stdin
    elif f_name.lower().endswith('.gz'):
        fid = gzip.open(f_name,'r')
    else:
        fid = open(f_name,'r')
    j = 0
    p1 = None
    p2 = None
    while True:
        gc.disable()
        lines = fid.readlines(size_read_buffer)
        gc.enable()
        if not lines:
            break
        for a_line in lines:
            j = j + 1
            if j == 1:
                p1 = a_line
            elif j == 2:
                p2 = a_line
            elif j == 4:
                yield (p1,p2,a_line)
                p1 = None
                p2 = None
                j = 0
    fid.close()
 def add_line(self,line):
     gc.disable()
     self.data.append(line)
     gc.enable()
     self.size = self.size + len(line)
     if self.size > self.size_buffer:
         self.__write_buffer()
Beispiel #16
0
def main():
    for key_file in glob.glob("*.pem"):
        with open(key_file, "r") as f:
            private_key = f.read()

        # Check that the signatures match.
        results = []
        for name, func in get_signature.items():
            if not available[name]:
                continue
            results.append((name, func(private_key, "foo bar")))
        print "{} using {}:".format(key_file, ", ".join(r[0] for r in results))
        signatures = dict((r[1], True) for r in results).keys()
        if len(signatures) == 1:
            print "  EQUAL"
        else:
            print "  NOT EQUAL"

        # Simple benchmark.
        iters = 500
        s = get_random_string(500)
        for name, func in get_signature.items():
            if not available[name]:
                continue
            print "running {} iterations of {}".format(iters, name)
            gc.disable()
            tic = time.time()
            for i in range(iters):
                func(private_key, s)
            toc = time.time()
            gc.enable()
            print "  took {:.3f}s".format(toc - tic)

        print
Beispiel #17
0
def _main():
    """
    main loop
    """
    user = None
    GAME_SELECT_DELAY = .4
    
    while True:
        gc.disable()
        hardware.reset()

        if user is None:
            user = persistence.get_anonymous()
            
        # do game selection by good/bad light
        hardware.write_message("Waiting for a game selection","  Choose 1 - %d" % len(games)).\
                display_characters('H','I')
            
        select = hardware.select_by_lights(len(games),9)
        if select == 9:
            for i in xrange(5):
                hardware.display_characters('B','Y')\
                        .wait(.3)\
                        .display_characters(' ',' ')\
                        .wait(.2)
            hardware.wait(1)\
                    .cleanup()
            exit()
            
        # game picked, construct it
        (name,description,levels,author,date,ver) = games[select-1].GameInfo()
        game = games[select-1]() 
        level = 1
        if levels > 1:
            hardware.display_characters('L','E')
            level = hardware.select_by_lights(levels,9)
        if level == 9:
            continue
        
        hardware.display_number(0)
        
        game.initialize(hardware,user,level)

        hardware.write_message("Playing game>",name)
        hardware.write_debug(description,'by',author)
        
        score = Score().load_at_start(name,ver,level,user)
        persistence.save_score_start(score,user)
        
        start = time.time()
        score.score = game.play()

        score.duration_sec = time.time() - start
        persistence.save_score_end(score,user)

        hardware.beep(2,.5)
        hardware.blink_light_until_button(5)
        
        gc.enable()
        gc.collect()
Beispiel #18
0
 def load(self, file):
     # "file" could also be a socket
     gc.disable()
     try:
         return pickle.load(file)
     finally:
         gc.enable()
Beispiel #19
0
    def loadblk(self, blk, buf):
        # we are in sighandler - establish cycle which also referenced obj_4del and trigger full GC
        assert self.obj_4del is not None
        w = weakref.ref(self.obj_4del)
        assert w() is self.obj_4del

        # establish cycle with leaf ref to obj_4del
        a = C()
        b = C()
        a.b = b
        b.a = a
        a.obj_4del = self.obj_4del

        self.obj_4del = None
        assert w() is not None

        # del a=b cycle - it should stay alice, while gc is disabled
        gc_save = gc.isenabled()
        gc.disable()

        del a, b
        assert w() is not None

        # gc - a=b and obj_4del collected
        gc.collect()
        assert w() is None

        if gc_save:
            gc.enable()

        self.marker_list.append(2)
def std_filter(use_cols, nrows=5000, threshold=0.02):

    data1 = pd.read_csv('../output/tar/train_pre_agg_0-10000.csv', nrows=nrows, usecols=use_cols)
    data2 = pd.read_csv('../output/tar/train_pre_agg_10000-50000.csv', nrows=nrows, usecols=use_cols)
    data3 = pd.read_csv('../output/tar/train_pre_agg_6-10.csv', nrows=nrows, usecols=use_cols)
    data4 = pd.read_csv('../output/tar/train_pre_agg_10-20.csv', nrows=nrows, usecols=use_cols)
    data5 = pd.read_csv('../output/tar/train_pre_agg_20-30.csv', nrows=nrows, usecols=use_cols)
    data6 = pd.read_csv('../output/tar/train_pre_agg_30+.csv', nrows=nrows, usecols=use_cols)

    datas = [data1, data2, data3,
             data4, data5, data6]

    to_drop = []

    for data in datas:

        data_std = np.std(data, axis=0)

        cols_drop = data_std.index[data_std < threshold]
        print('\nmissing feature > {} {}'.format(threshold, len(cols_drop)))

        to_drop = list(set(set(cols_drop) | set(to_drop)))
        print('to drop length {}'.format(len(to_drop)))

    use_cols = [col for col in data1.columns if col not in to_drop]
    # pprint(use_cols)

    del data1, data2, data3, data4, data5, data6, datas
    gc.enable()

    return use_cols
def nan_filter(nrows=5000, missing_thread=0.9):

    data1 = pd.read_csv('../output/tar/train_pre_agg_0-10000.csv', nrows=nrows)
    data2 = pd.read_csv('../output/tar/train_pre_agg_10000-50000.csv', nrows=nrows)
    data3 = pd.read_csv('../output/tar/train_pre_agg_6-10.csv', nrows=nrows)
    data4 = pd.read_csv('../output/tar/train_pre_agg_10-20.csv', nrows=nrows)
    data5 = pd.read_csv('../output/tar/train_pre_agg_20-30.csv', nrows=nrows)
    data6 = pd.read_csv('../output/tar/train_pre_agg_30+.csv', nrows=nrows)

    datas = [data1, data2, data3,
             data4, data5, data6]

    to_drop = []

    for data in datas:

        data_missing = (data.isnull().sum() / len(data)).sort_values(ascending=False)

        data_missing = data_missing.index[data_missing > missing_thread]
        print('\nmissing feature > {} {}'.format(missing_thread, len(data_missing)))

        to_drop = list(set(set(data_missing) | set(to_drop)))
        print('to drop length {}'.format(len(to_drop)))


    use_cols = [col for col in data1.columns if col not in to_drop]
    # pprint(use_cols)

    del data1, data2, data3, data4, data5, data6, datas
    gc.enable()

    return use_cols
Beispiel #22
0
def group_edges(cs):
    plus = []
    minus = []
    pairs = []
    gc.disable()
    interval = 1000
    for current, cl1 in enumerate(cs.clusters):
        if (current % interval) == 0:
            update_status(float(current) / len(cs.clusters), "Grouping all edges...")

        bib1 = tuple(cl1.bibs)[0]
        pointers = cl1.out_edges
        for bib2 in xrange(len(cl1.out_edges)):
            val = pointers[bib2]
            if val[0] not in Bib_matrix.special_numbers:
                if val[0] > edge_cut_prob:
                    pairs.append((bib1, bib2, val))
            elif val[0] == Bib_matrix.special_symbols['+']:
                plus.append((bib1, bib2))
            elif val[0] == Bib_matrix.special_symbols['-']:
                minus.append((bib1, bib2))
            else:
                assert val[0] == Bib_matrix.special_symbols[None], "Invalid Edge"

    update_status_final("Finished with the edge grouping.")

    bibauthor_print("Positive edges: %d, Negative edges: %d, Value edges: %d."
                     % (len(plus), len(minus), len(pairs)))
    gc.enable()
    return plus, minus, pairs
Beispiel #23
0
 def f(*k, **kw):
     try:
         gc.disable()
         ret = fun(*k, **kw)
     finally:
         gc.enable()
     return ret
Beispiel #24
0
def checkMemory():
    """as the name says"""
    # pylint: disable=too-many-branches
    if not Debug.gc:
        return
    gc.set_threshold(0)
    gc.set_debug(gc.DEBUG_LEAK)
    gc.enable()
    print('collecting {{{')
    gc.collect()        # we want to eliminate all output
    print('}}} done')

    # code like this may help to find specific things
    if True: # pylint: disable=using-constant-test
        interesting = ('Client', 'Player', 'Game')
        for obj in gc.garbage:
            if hasattr(obj, 'cell_contents'):
                obj = obj.cell_contents
            if not any(x in repr(obj) for x in interesting):
                continue
            for referrer in gc.get_referrers(obj):
                if referrer is gc.garbage:
                    continue
                if hasattr(referrer, 'cell_contents'):
                    referrer = referrer.cell_contents
                if referrer.__class__.__name__ in interesting:
                    for referent in gc.get_referents(referrer):
                        print('%s refers to %s' % (referrer, referent))
                else:
                    print('referrer of %s/%s is: id=%s type=%s %s' %
                          (type(obj), obj, id(referrer),
                           type(referrer), referrer))
    print('unreachable:%s' % gc.collect())
    gc.set_debug(0)
Beispiel #25
0
def timer(fxn, args):
    gc.disable()
    t1 = time.time()
    R = fxn(*args)
    t2 = time.time()
    gc.enable()
    return R, (t2 - t1)
Beispiel #26
0
 def newfunc(*args,**kargs):
     try:
         gc.disable()
         result = func( *args, **kargs)
     finally:
         gc.enable()
     return result
  def next_seed(self):
    """
    Load next seed from disk
    """
    seed = next(self._all_seeds)
    folder = os.path.join(self._root, str(seed), self._subset)
    self.data = []
    silence = None

    gc.disable()

    for filename in os.listdir(folder):
      command = os.path.splitext(os.path.basename(filename))[0]
      with open(os.path.join(folder, filename), "r") as pkl_file:
        audio = pickle.load(pkl_file)

      # Check for 'silence'
      if command == "silence":
        silence = audio
      else:
        target = self.classes.index(os.path.basename(command))
        self.data.extend(itertools.product(audio, [target]))

    gc.enable()

    target = self.classes.index("silence")
    self.data += [(silence, target)] * int(len(self.data) * self._silence_percentage)
    return seed
Beispiel #28
0
def compare_algos(fil):
    img = nb.load(fil)
    mask = img.get_data() > 0

    print('Parameter initialization')
    S = init_classical(img, mask)
    map0 = S.map()
    mu0 = S.mu.copy()
    sigma0 = S.sigma.copy()

    print('Running classical VEM')
    e0, f0 = run_ve(S, niters=NITERS)
    jac0 = jaccard(S.map(), map0)
    map0 = S.map()
    tmp = e0[-1][0] + e0[-1][1]
    print('Final energy: %f' % tmp)

    print('Running Laplace relaxed VEM')
    S = init_laplace(img, mask, mu0, sigma0)
    jac1 = jaccard(S.map(), map0)
    e, f = run_ve(S, niters=NITERS)
    jac2 = jaccard(S.map(), map0)
    tmp = e[-1][0] + e[-1][1]
    print('Final energy: %f' % tmp)

    del img
    del mask
    del S
    del map0
    gc.enable()
    gc.collect()

    return {'e0': e0, 'f0': f0, 'e': e, 'f': f,
            'jac0': jac0, 'jac1': jac1, 'jac2': jac2}
Beispiel #29
0
	def grab_frame(self):
		now = time.time()
		right_local_frame = None
		left_local_frame = None
		try:
			for i in range (4):
				ret1, right_local_frame = self.right_cam.read()
			time.sleep(.10)
			for i in range (4):
				ret2, left_local_frame = self.left_cam.read()
		except:
			pass
		self.capture_time = (time.time()-now)
		print "capture time:", self.capture_time
		if self.capture_time > 3 or right_local_frame == None or left_local_frame == None:
			#time.sleep(1)
			print "camera fault: recovering...", self.recovery_count
			self.recovery_count += 1
			try:
				if self.right_cam != None or self.left_cam != None:
					self.right_cam.release
					self.left_cam.release	
				gc.enable()
				gc.collect()			
				self.initialize_camera()
			except:
				#time.sleep(.1)
				pass
			self.grab_frame()

		else:
			self.frame_count += 1		
			self.right_frame = right_local_frame
			self.left_frame = left_local_frame
			print 'frame count:', self.frame_count
Beispiel #30
0
	def run(self):
		self.connect()
		self.initialize_camera(self.camera_num, self.x, self.y)
		while True:
			time.sleep(0.0001) #dont hog resources
			self.frame = None
			self.frame_count += 1
			now = time.time()
			try:
				ret, self.frame = self.camera.read()
			except:
				pass
			self.capture_time = (time.time()-now)
			print 'frames:', self.frame_count , "   capture time:", self.capture_time, "   recovery_count:", self.recovery_count 
			if self.capture_time > 0.9 or self.frame == None:
				self.frame = None
				while self.frame == None:
					self.recovery_count += 1
					try:
						if self.camera != None:
							self.camera.release	
						gc.enable()
						gc.collect()			
						self.initialize_camera(self.camera_num, self.x, self.y)
						try:
							ret, self.frame = self.camera.read()
						except:
							pass
					except:
						time.sleep(.5)
						pass
			pickled_frame = pickle.dumps(self.frame,-1)
			self.publish(pickled_frame)
Beispiel #31
0
import gc


class Human:
    def __init__(self, name):
        self.name = name
        self.head = self.Head()
        self.brain = self.head.Brain()
        self.display()

    def display(self):
        print("hello ", self.name)
        self.head.talk()
        self.brain.think()

    class Head:
        def talk(self):
            print("talking")

        class Brain:
            def think(self):
                print("Thinking")


h = Human("anil")
#print(gc.isenabled())
print(gc.disable())
print(gc.enable())
print(gc.isenabled())
Beispiel #32
0
    def __init__(self, fname=None, fdata=None, decompress=False,
                 decrypt=False, password='', disable_gc=True, verbose=True):
        self.private.verbose = verbose

        # Runs a lot faster with GC off.
        disable_gc = disable_gc and gc.isenabled()
        if disable_gc:
            gc.disable()

        try:
            if fname is not None:
                assert fdata is None
                # Allow reading preexisting streams like pyPdf
                if hasattr(fname, 'read'):
                    fdata = fname.read()
                else:
                    try:
                        f = open(fname, 'rb')
                        fdata = f.read()
                        f.close()
                    except IOError:
                        raise PdfParseError('Could not read PDF file %s' %
                                            fname)

            assert fdata is not None
            fdata = convert_load(fdata)

            if not fdata.startswith('%PDF-'):
                startloc = fdata.find('%PDF-')
                if startloc >= 0:
                    log.warning('PDF header not at beginning of file')
                else:
                    lines = fdata.lstrip().splitlines()
                    if not lines:
                        raise PdfParseError('Empty PDF file!')
                    raise PdfParseError('Invalid PDF header: %s' %
                                        repr(lines[0]))

            self.private.version = fdata[5:8]

            endloc = fdata.rfind('%EOF')
            if endloc < 0:
                raise PdfParseError('EOF mark not found: %s' %
                                    repr(fdata[-20:]))
            endloc += 6
            junk = fdata[endloc:]
            fdata = fdata[:endloc]
            if junk.rstrip('\00').strip():
                log.warning('Extra data at end of file')

            private = self.private
            private.indirect_objects = {}
            private.deferred_objects = set()
            private.special = {'<<': self.readdict,
                               '[': self.readarray,
                               'endobj': self.empty_obj,
                               }
            for tok in r'\ ( ) < > { } ] >> %'.split():
                self.special[tok] = self.badtoken

            startloc, source = self.findxref(fdata)
            private.source = source

            # Find all the xref tables/streams, and
            # then deal with them backwards.
            xref_list = []
            while 1:
                source.obj_offsets = {}
                trailer, is_stream = self.parsexref(source)
                prev = trailer.Prev
                if prev is None:
                    token = source.next()
                    if token != 'startxref' and not xref_list:
                        source.warning('Expected "startxref" '
                                       'at end of xref table')
                    break
                xref_list.append((source.obj_offsets, trailer, is_stream))
                source.floc = int(prev)

            # Handle document encryption
            private.crypt_filters = None
            if decrypt and PdfName.Encrypt in trailer:
                identity_filter = crypt.IdentityCryptFilter()
                crypt_filters = {
                    PdfName.Identity: identity_filter
                }
                private.crypt_filters = crypt_filters
                private.stream_crypt_filter = identity_filter
                private.string_crypt_filter = identity_filter

                if not crypt.HAS_CRYPTO:
                    raise PdfParseError(
                        'Install PyCrypto to enable encryption support')

                self._parse_encrypt_info(source, password, trailer)

            if is_stream:
                self.load_stream_objects(trailer.object_streams)

            while xref_list:
                later_offsets, later_trailer, is_stream = xref_list.pop()
                source.obj_offsets.update(later_offsets)
                if is_stream:
                    trailer.update(later_trailer)
                    self.load_stream_objects(later_trailer.object_streams)
                else:
                    trailer = later_trailer

            trailer.Prev = None

            if (trailer.Version and
                    float(trailer.Version) > float(self.version)):
                self.private.version = trailer.Version

            if decrypt:
                self.decrypt_all()
                trailer.Encrypt = None

            if is_stream:
                self.Root = trailer.Root
                self.Info = trailer.Info
                self.ID = trailer.ID
                self.Size = trailer.Size
                self.Encrypt = trailer.Encrypt
            else:
                self.update(trailer)

            # self.read_all_indirect(source)
            private.pages = self.readpages(self.Root)
            if decompress:
                self.uncompress()

            # For compatibility with pyPdf
            private.numPages = len(self.pages)
        finally:
            if disable_gc:
                gc.enable()
Beispiel #33
0
 def setUp(self):
     gc.enable()
Beispiel #34
0
def split_reads(f_in,
                f_list,
                f_out_1,
                f_out_2,
                wiggle=0,
                gap=0,
                anchor=15,
                anchor_max=500,
                replace_solexa_ids="",
                rc=False,
                size_buffer=2 * (10**9)):

    data1 = lines_to_file(f_out_1)
    data2 = lines_to_file(f_out_2)

    fid = open(f_list, 'r')
    reads = []
    while True:

        p = fid.tell()
        err = True
        sb = size_buffer
        if sb == 0:
            sb = 2 * (10**9)
        while err:
            gc.disable()
            try:
                lines = fid.readlines(sb)
            except MemoryError:
                print >> sys.stderr, "Warning: Not enough free memory (it needed %d)!!! Trying again with a 50% smaller buffer..." % (
                    sb, )
                sb = int(sb / 2)
                if sb < 10000000:
                    print >> sys.stderr, "Error: Not enough free memory (it needed %d)!!! Giving up..." % (
                        sb, )
                    os.system("free -m")
                    sys.exit(1)
                err = True
                fid.seek(p)
            else:
                err = False
            gc.enable()

        if not lines:
            break

        gc.disable()
        reads = [line.rstrip('\r\n').partition("\t") for line in lines]
        gc.enable()

        gc.disable()
        r = dict()
        wiggle_range = range(-wiggle, wiggle + 1)

        for line in reads:
            k = line[0]
            if not r.has_key(k):
                r[k] = set()
            w = int(line[2])
            for wig in wiggle_range:
                r[k].add(w + wig)
        reads = r
        gc.enable()
        am1 = anchor - 1
        am2 = anchor - 2

        for read in reads_from_fastq_file(f_in):
            v = reads.get(read[0][1:].rstrip('\r\n'), None)
            if not v:
                continue
            v = list(v)
            i = 0
            unique = set()
            if gap != 0:
                for agap in xrange(1, gap + 1):
                    for cut in v:

                        if cut + 1 - agap > anchor - 1:
                            k1 = cut + 1 - agap
                            k2 = cut + 1
                            if (k1, k2) not in unique:
                                w = givemeid(replace_solexa_ids, read[0][:-1],
                                             i)
                                #                                if replace_solexa_ids:
                                #                                    w = read[0][:-1].replace("/",replace_solexa_ids,1)+'__'+int2str(i)
                                #                                else:
                                #                                    w = read[0][:-1]+'__'+int2str(i)
                                r1a = read[1][0:cut + 1 - agap]
                                r2a = read[2][0:cut + 1 - agap]
                                r1b = read[1][cut + 1:]
                                r2b = read[2][cut + 1:]
                                lr1a = len(r1a)
                                lr1b = len(r1b)
                                if lr1a > am1 and lr1b > am2:
                                    data1.add_line("%sa\n%s\n+\n%s\n" %
                                                   (w, r1a, r2a))
                                    if rc:
                                        data2.add_line(
                                            "%sb\n%s\n+\n%s\n" %
                                            (w, reversecomplement(r1b),
                                             reverse(r2b)))
                                    else:
                                        data2.add_line("%sb\n%s+\n%s" %
                                                       (w, r1b, r2b))
                                    i = i + 1
                                    unique.add((k1, k2))

                                    flag = True  # trim only one end and not both ends
                                    if lr1a > anchor_max:
                                        r1a = r1a[-anchor_max:]
                                        r2a = r2a[-anchor_max:]
                                        flag = False
                                    if lr1b > anchor_max and flag:
                                        r1b = r1b[:anchor_max]
                                        r2b = r2b[:anchor_max]
                                        flag = False
                                    if flag == False:
                                        w = givemeid(replace_solexa_ids,
                                                     read[0][:-1], i)
                                        data1.add_line("%sa\n%s\n+\n%s\n" %
                                                       (w, r1a, r2a))
                                        if rc:
                                            data2.add_line(
                                                "%sb\n%s\n+\n%s\n" %
                                                (w, reversecomplement(r1b),
                                                 reverse(r2b)))
                                        else:
                                            data2.add_line("%sb\n%s+\n%s" %
                                                           (w, r1b, r2b))
                                        i = i + 1

                        if len(read[1]) - (cut + 1 + agap) > anchor - 1:
                            k1 = cut + 1
                            k2 = cut + 1 + agap
                            if (k1, k2) not in unique:
                                w = givemeid(replace_solexa_ids, read[0][:-1],
                                             i)
                                #                                if replace_solexa_ids:
                                #                                    w = read[0][:-1].replace("/",replace_solexa_ids,1)+'__'+int2str(i)
                                #                                else:
                                #                                    w = read[0][:-1]+'__'+int2str(i)
                                r1a = read[1][0:cut + 1]
                                r2a = read[2][0:cut + 1]
                                r1b = read[1][cut + 1 + agap:]
                                r2b = read[2][cut + 1 + agap:]
                                lr1a = len(r1a)
                                lr1b = len(r1b)
                                if lr1a > am1 and lr1b > am2:
                                    data1.add_line("%sa\n%s\n+\n%s\n" %
                                                   (w, r1a, r2a))
                                    if rc:
                                        data2.add_line(
                                            "%sb\n%s\n+\n%s\n" %
                                            (w, reversecomplement(r1b),
                                             reverse(r2b)))
                                    else:
                                        data2.add_line("%sb\n%s+\n%s" %
                                                       (w, r1b, r2b))
                                    i = i + 1
                                    unique.add((k1, k2))

                                    flag = True  # trim only one end and not both ends
                                    if lr1a > anchor_max:
                                        r1a = r1a[-anchor_max:]
                                        r2a = r2a[-anchor_max:]
                                        flag = False
                                    if lr1b > anchor_max and flag:
                                        r1b = r1b[:anchor_max]
                                        r2b = r2b[:anchor_max]
                                        flag = False
                                    if flag == False:
                                        w = givemeid(replace_solexa_ids,
                                                     read[0][:-1], i)
                                        data1.add_line("%sa\n%s\n+\n%s\n" %
                                                       (w, r1a, r2a))
                                        if rc:
                                            data2.add_line(
                                                "%sb\n%s\n+\n%s\n" %
                                                (w, reversecomplement(r1b),
                                                 reverse(r2b)))
                                        else:
                                            data2.add_line("%sb\n%s+\n%s" %
                                                           (w, r1b, r2b))
                                        i = i + 1

            else:
                for cut in v:
                    w = givemeid(replace_solexa_ids, read[0][:-1], i)
                    #                    if replace_solexa_ids:
                    #                        w = read[0][:-1].replace("/",replace_solexa_ids,1)+'__'+int2str(i)
                    #                    else:
                    #                        w = read[0][:-1]+'__'+int2str(i)
                    r1a = read[1][0:cut + 1]
                    r2a = read[2][0:cut + 1]
                    r1b = read[1][cut + 1:]
                    r2b = read[2][cut + 1:]
                    lr1a = len(r1a)
                    lr1b = len(r1b)
                    if lr1a > am1 and lr1b > am2:
                        data1.add_line("%sa\n%s\n+\n%s\n" % (w, r1a, r2a))
                        if rc:
                            data2.add_line(
                                "%sb\n%s\n+\n%s\n" %
                                (w, reversecomplement(r1b), reverse(r2b)))
                        else:
                            data2.add_line("%sb\n%s+\n%s" % (w, r1b, r2b))
                        i = i + 1

                        flag = True  # trim only one end and not both ends
                        if lr1a > anchor_max:
                            r1a = r1a[-anchor_max:]
                            r2a = r2a[-anchor_max:]
                            flag = False
                        if lr1b > anchor_max and flag:
                            r1b = r1b[:anchor_max]
                            r2b = r2b[:anchor_max]
                            flag = False

                        if flag == False:
                            w = givemeid(replace_solexa_ids, read[0][:-1], i)
                            data1.add_line("%sa\n%s\n+\n%s\n" % (w, r1a, r2a))
                            if rc:
                                data2.add_line(
                                    "%sb\n%s\n+\n%s\n" %
                                    (w, reversecomplement(r1b), reverse(r2b)))
                            else:
                                data2.add_line("%sb\n%s+\n%s" % (w, r1b, r2b))
                            i = i + 1
    data1.close()
    data2.close()
    fid.close()
Beispiel #35
0
 def test_enable(self):
     gc.enable()
     result = gc.isenabled()
     self.assertTrue(result,"enable Method can't set gc.isenabled as true.")
def Network_config(class_num=4, epoch=200, initial_epoch=0, batch_size=32,
                     train_data=None, train_label=None,
                     test_data=None, test_label=None, fold=0):
    adam = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.000)
    sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)
    K.set_learning_phase(1)
    base_model = InceptionV3(input_tensor=Input(shape=(299, 299, 3)), weights='imagenet', include_top=False)

    x = base_model.output
    # K.set_learning_phase(1)
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    predictions = Dense(class_num, activation='softmax')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in (base_model.layers):
        layer.trainable = False
        if layer.name.startswith('bn') or 'bn' in layer.name:
            layer.call(layer.input, training=False)

    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=[keras.metrics.categorical_accuracy])


    tools.create_directory('./tmpinception/')
    weights_file = './tmpinception/' + str(fold)+'-weights.{epoch:02d}-{categorical_accuracy:.4f}-{val_loss:.4f}-{val_categorical_accuracy:.4f}.h5'
    csv_file = './tmpinception/record.csv'
    lr_reducer = ReduceLROnPlateau(monitor='categorical_accuracy', factor=0.5,
                                   cooldown=0, patience=5, min_lr=0.5e-6)
    early_stopper = EarlyStopping(monitor='val_categorical_accuracy', min_delta=1e-4, patience=50)

    model_checkpoint = ModelCheckpoint(weights_file, monitor='val_categorical_accuracy', save_best_only=True,
                                       verbose=2,
                                       save_weights_only=True, mode='max')
    tensorboard = TensorBoard(log_dir='./logs/', histogram_freq=0, batch_size=8, write_graph=True,
                              write_grads=True, write_images=True, embeddings_freq=0, embeddings_layer_names=None,
                              embeddings_metadata=None)
    CSV_record = CSVLogger(csv_file, separator=',', append=True)

    callbacks = [lr_reducer, early_stopper, model_checkpoint, tensorboard, CSV_record]
    gc.disable()
    model.fit_generator(
        generator=tools.batch_generator(np.array(train_data), np.array(train_label), batch_size, True, class_num),
        steps_per_epoch=int(len(train_label)/batch_size)-1,
        max_q_size=20,
        initial_epoch=initial_epoch,
        epochs=epoch,
        verbose=1,
        callbacks=callbacks,
        validation_data=tools.batch_generator(np.array(test_data), np.array(test_label), batch_size, True, class_num),
        validation_steps=int(len(test_label)/batch_size)-1,
        class_weight='auto')


    all_y_pred = []
    all_y_true = []
    for test_data_batch, test_label_batch in tools.batch_generator_confusion_matrix(np.array(test_data),np.array(test_label), batch_size, True, class_num):
        y_pred = model.predict(test_data_batch, batch_size)
        y_true = test_label_batch
        for y_p in y_pred:
            all_y_pred.append(np.where(y_p == max(y_p))[0][0])

        for y_t in y_true:
            all_y_true.append(np.where(y_t == max(y_t))[0][0])
    confusion = confusion_matrix(y_true=all_y_true,y_pred=all_y_pred)
    print(confusion)
    f = open('confusion_matrix.txt','a+')
    f.write(str(all_y_true)+"\n")
    f.write(str(all_y_pred)+"\n")
    f.write(str(confusion)+'\n')
    f.close()
    gc.enable()
Beispiel #37
0
def run_command(command=None,
                parser=None,
                args=None,
                name='unknown',
                data=None,
                options=None):
    """
    Execute a function that processes command-line arguments and
    then calls a command-line driver.

    This function provides a generic facility for executing a command
    function is rather generic.  This function is segregated from
    the driver to enable profiling of the command-line execution.

    Required:
        command:    The name of a function that will be executed to perform process the command-line
                    options with a parser object.
        parser:     The parser object that is used by the command-line function.

    Optional:
        options:    If this is not None, then ignore the args option and use
                    this to specify command options.
        args:       Command-line arguments that are parsed.  If this value is `None`, then the
                    arguments in `sys.argv` are used to parse the command-line.
        name:       Specifying the name of the command-line (for error messages).
        data:       A container of labeled data.

    Returned:
        retval:     Return values from the command-line execution.
        errorcode:  0 if Pyomo ran successfully
    """
    #
    #
    # Parse command-line options
    #
    #
    retval = None
    errorcode = 0
    if options is None:
        try:
            if type(args) is argparse.Namespace:
                _options = args
            else:
                _options = parser.parse_args(args=args)
            # Replace the parser options object with a pyutilib.misc.Options object
            options = pyutilib.misc.Options()
            for key in dir(_options):
                if key[0] != '_':
                    val = getattr(_options, key)
                    if not isinstance(val, types.MethodType):
                        options[key] = val
        except SystemExit:
            # the parser throws a system exit if "-h" is specified - catch
            # it to exit gracefully.
            return Container(retval=retval, errorcode=errorcode)
    #
    # Configure loggers
    #
    configure_loggers(options=options)
    #
    # Call the main Pyomo runner with profiling
    #
    TempfileManager.push()
    pcount = options.runtime.profile_count
    if pcount > 0:
        # Defer import of profiling packages until we know that they
        # are needed
        try:
            try:
                import cProfile as profile
            except ImportError:
                import profile
            import pstats
        except ImportError:
            configure_loggers(shutdown=True)
            raise ValueError(
                "Cannot use the 'profile' option: the Python "
                "'profile' or 'pstats' package cannot be imported!")
        tfile = TempfileManager.create_tempfile(suffix=".profile")
        tmp = profile.runctx(
            command.__name__ + '(options=options,parser=parser)',
            command.__globals__, locals(), tfile)
        p = pstats.Stats(tfile).strip_dirs()
        p.sort_stats('time', 'cumulative')
        p = p.print_stats(pcount)
        p.print_callers(pcount)
        p.print_callees(pcount)
        p = p.sort_stats('cumulative', 'calls')
        p.print_stats(pcount)
        p.print_callers(pcount)
        p.print_callees(pcount)
        p = p.sort_stats('calls')
        p.print_stats(pcount)
        p.print_callers(pcount)
        p.print_callees(pcount)
        retval = tmp
    else:
        #
        # Call the main Pyomo runner without profiling
        #
        TempfileManager.push()
        try:
            retval = command(options=options, parser=parser)
        except SystemExit:
            err = sys.exc_info()[1]
            #
            # If debugging is enabled or the 'catch' option is specified, then
            # exit.  Otherwise, print an "Exiting..." message.
            #
            if __debug__ and (options.runtime.logging == 'debug'
                              or options.runtime.catch_errors):
                configure_loggers(shutdown=True)
                sys.exit(0)
            print('Exiting %s: %s' % (name, str(err)))
            errorcode = err.code
        except Exception:
            err = sys.exc_info()[1]
            #
            # If debugging is enabled or the 'catch' option is specified, then
            # pass the exception up the chain (to pyomo_excepthook)
            #
            if __debug__ and (options.runtime.logging == 'debug'
                              or options.runtime.catch_errors):
                configure_loggers(shutdown=True)
                TempfileManager.pop(remove=not options.runtime.keep_files)
                raise

            if not options.model is None and not options.model.save_file is None:
                model = "model " + options.model.save_file
            else:
                model = "model"

            global filter_excepthook
            if filter_excepthook:
                action = "loading"
            else:
                action = "running"

            msg = "Unexpected exception while %s %s:\n    " % (action, model)
            #
            # This handles the case where the error is propagated by a KeyError.
            # KeyError likes to pass raw strings that don't handle newlines
            # (they translate "\n" to "\\n"), as well as tacking on single
            # quotes at either end of the error message. This undoes all that.
            #
            errStr = str(err)
            if type(err) == KeyError and errStr != "None":
                errStr = str(err).replace(r"\n", "\n")[1:-1]

            logger.error(msg + errStr)
            errorcode = 1

    configure_loggers(shutdown=True)

    if options.runtime.disable_gc:
        gc.enable()
    TempfileManager.pop(remove=not options.runtime.keep_files)
    return Container(retval=retval, errorcode=errorcode)
Beispiel #38
0
 def tearDown(self):
     self.graph.close()
     if self.gcold:
         gc.enable()
     # TODO: delete a_tmp_dir
     self.graph.close()
        def _execute_child(self, args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite):
            """Execute program (POSIX version)"""
            if isinstance(args, types.StringTypes):
                args = [args]
            else:
                args = list(args)
            if shell:
                args = ['/bin/sh', '-c'] + args
                if executable:
                    args[0] = executable
            if executable is None:
                executable = args[0]
            errpipe_read, errpipe_write = os.pipe()
            try:
                try:
                    self._set_cloexec_flag(errpipe_write)
                    gc_was_enabled = gc.isenabled()
                    gc.disable()
                    try:
                        self.pid = os.fork()
                    except:
                        if gc_was_enabled:
                            gc.enable()
                        raise

                    self._child_created = True
                    if self.pid == 0:
                        try:
                            if p2cwrite is not None:
                                os.close(p2cwrite)
                            if c2pread is not None:
                                os.close(c2pread)
                            if errread is not None:
                                os.close(errread)
                            os.close(errpipe_read)
                            if p2cread is not None:
                                os.dup2(p2cread, 0)
                            if c2pwrite is not None:
                                os.dup2(c2pwrite, 1)
                            if errwrite is not None:
                                os.dup2(errwrite, 2)
                            if p2cread is not None and p2cread not in (0,):
                                os.close(p2cread)
                            if c2pwrite is not None and c2pwrite not in (p2cread, 1):
                                os.close(c2pwrite)
                            if errwrite is not None and errwrite not in (p2cread, c2pwrite, 2):
                                os.close(errwrite)
                            if close_fds:
                                self._close_fds(but=errpipe_write)
                            if cwd is not None:
                                os.chdir(cwd)
                            if preexec_fn:
                                preexec_fn()
                            if env is None:
                                os.execvp(executable, args)
                            else:
                                os.execvpe(executable, args, env)
                        except:
                            exc_type, exc_value, tb = sys.exc_info()
                            exc_lines = traceback.format_exception(exc_type, exc_value, tb)
                            exc_value.child_traceback = ''.join(exc_lines)
                            os.write(errpipe_write, pickle.dumps(exc_value))

                        os._exit(255)
                    if gc_was_enabled:
                        gc.enable()
                finally:
                    os.close(errpipe_write)

                if p2cread is not None and p2cwrite is not None:
                    os.close(p2cread)
                if c2pwrite is not None and c2pread is not None:
                    os.close(c2pwrite)
                if errwrite is not None and errread is not None:
                    os.close(errwrite)
                data = _eintr_retry_call(os.read, errpipe_read, 1048576)
            finally:
                os.close(errpipe_read)

            if data != '':
                _eintr_retry_call(os.waitpid, self.pid, 0)
                child_exception = pickle.loads(data)
                for fd in (p2cwrite, c2pread, errread):
                    if fd is not None:
                        os.close(fd)

                raise child_exception
Beispiel #40
0
def test_global_gc_when_full(shutdown_only):
    cluster = ray.cluster_utils.Cluster()
    for _ in range(2):
        cluster.add_node(num_cpus=1,
                         num_gpus=0,
                         object_store_memory=100 * 1024 * 1024)
    ray.init(address=cluster.address)

    class LargeObjectWithCyclicRef:
        def __init__(self):
            self.loop = self
            self.large_object = ray.put(
                np.zeros(40 * 1024 * 1024, dtype=np.uint8))

    @ray.remote(num_cpus=1)
    class GarbageHolder:
        def __init__(self):
            gc.disable()
            x = LargeObjectWithCyclicRef()
            self.garbage = weakref.ref(x)

        def has_garbage(self):
            return self.garbage() is not None

        def return_large_array(self):
            return np.zeros(80 * 1024 * 1024, dtype=np.uint8)

    try:
        gc.disable()

        # Local driver.
        local_ref = weakref.ref(LargeObjectWithCyclicRef())

        # Remote workers.
        actors = [GarbageHolder.remote() for _ in range(2)]
        assert local_ref() is not None
        assert all(ray.get([a.has_garbage.remote() for a in actors]))

        # GC should be triggered for all workers, including the local driver,
        # when the driver tries to ray.put a value that doesn't fit in the
        # object store. This should cause the captured ObjectRefs' numpy arrays
        # to be evicted.
        ray.put(np.zeros(80 * 1024 * 1024, dtype=np.uint8))

        def check_refs_gced():
            return (local_ref() is None and
                    not any(ray.get([a.has_garbage.remote() for a in actors])))

        wait_for_condition(check_refs_gced)

        # Local driver.
        local_ref = weakref.ref(LargeObjectWithCyclicRef())

        # Remote workers.
        actors = [GarbageHolder.remote() for _ in range(2)]
        assert all(ray.get([a.has_garbage.remote() for a in actors]))

        # GC should be triggered for all workers, including the local driver,
        # when a remote task tries to put a return value that doesn't fit in
        # the object store. This should cause the captured ObjectRefs' numpy
        # arrays to be evicted.
        ray.get(actors[0].return_large_array.remote())

        def check_refs_gced():
            return (local_ref() is None and
                    not any(ray.get([a.has_garbage.remote() for a in actors])))

        wait_for_condition(check_refs_gced)
    finally:
        gc.enable()
Beispiel #41
0
            from setuptools_scm.git import parse
            kwargs['describe_command'] = \
                "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'"
            return parse(root, **kwargs)

        __version__ = setuptools_scm.get_version('../', parse=parse_git)
    except ImportError:
        __version__ = None

# ARROW-8684: Disable GC while initializing Cython extension module,
# to workaround Cython bug in https://github.com/cython/cython/issues/3603
_gc_enabled = _gc.isenabled()
_gc.disable()
import pyarrow.lib as _lib
if _gc_enabled:
    _gc.enable()

from pyarrow.lib import (BuildInfo, RuntimeInfo, VersionInfo, cpp_build_info,
                         cpp_version, cpp_version_info, runtime_info,
                         cpu_count, set_cpu_count, enable_signal_handlers,
                         io_thread_count, set_io_thread_count)


def show_versions():
    """
    Print various version information, to help with error reporting.
    """
    # TODO: CPU information and flags
    print("pyarrow version info\n--------------------")
    print("Package kind: {}".format(cpp_build_info.package_kind if len(
        cpp_build_info.package_kind) > 0 else "not indicated"))
    def identify_zero_importance(self,
                                 task,
                                 eval_metric=None,
                                 n_iterations=10,
                                 early_stopping=True):
        """
        
        Identify the features with zero importance according to a gradient boosting machine.
        The gbm can be trained with early stopping using a validation set to prevent overfitting. 
        The feature importances are averaged over `n_iterations` to reduce variance. 
        
        Uses the LightGBM implementation (http://lightgbm.readthedocs.io/en/latest/index.html)

        Parameters 
        --------

        eval_metric : string
            Evaluation metric to use for the gradient boosting machine for early stopping. Must be
            provided if `early_stopping` is True

        task : string
            The machine learning task, either 'classification' or 'regression'

        n_iterations : int, default = 10
            Number of iterations to train the gradient boosting machine
            
        early_stopping : boolean, default = True
            Whether or not to use early stopping with a validation set when training
        
        
        Notes
        --------
        
        - Features are one-hot encoded to handle the categorical variables before training.
        - The gbm is not optimized for any particular task and might need some hyperparameter tuning
        - Feature importances, including zero importance features, can change across runs

        """

        if early_stopping and eval_metric is None:
            raise ValueError(
                """eval metric must be provided with early stopping. Examples include "auc" for classification or
                             "l2" for regression.""")

        if self.labels is None:
            raise ValueError("No training labels provided.")

        # One hot encoding
        features = pd.get_dummies(self.data)
        self.one_hot_features = [
            column for column in features.columns
            if column not in self.base_features
        ]

        # Add one hot encoded data to original data
        self.data_all = pd.concat([features[self.one_hot_features], self.data],
                                  axis=1)

        # Extract feature names
        feature_names = list(features.columns)

        # Convert to np array
        features = np.array(features)
        labels = np.array(self.labels).reshape((-1, ))

        # Empty array for feature importances
        feature_importance_values = np.zeros(len(feature_names))

        print('Training Gradient Boosting Model\n')

        # Iterate through each fold
        for _ in range(n_iterations):

            if task == 'classification':
                model = lgb.LGBMClassifier(n_estimators=1000,
                                           learning_rate=0.05,
                                           verbose=0)

            elif task == 'regression':
                model = lgb.LGBMRegressor(n_estimators=2000,
                                          learning_rate=0.05,
                                          verbose=0)

            else:
                raise ValueError(
                    'Task must be either "classification" or "regression"')

            # If training using early stopping need a validation set
            if early_stopping:

                train_features, valid_features, train_labels, valid_labels = train_test_split(
                    features, labels, test_size=0.3)

                # Train the model with early stopping
                model.fit(train_features,
                          train_labels,
                          eval_metric='rmse',
                          eval_set=[(valid_features, valid_labels)],
                          early_stopping_rounds=100,
                          verbose=-1)

                # Clean up memory
                gc.enable()
                del train_features, train_labels, valid_features, valid_labels
                gc.collect()

            else:
                model.fit(features, labels)

            # Record the feature importances
            feature_importance_values += model.feature_importances_ / n_iterations

        feature_importances = pd.DataFrame({
            'feature':
            feature_names,
            'importance':
            feature_importance_values
        })

        # Sort features according to importance
        feature_importances = feature_importances.sort_values(
            'importance', ascending=False).reset_index(drop=True)

        # Normalize the feature importances to add up to one
        feature_importances['normalized_importance'] = feature_importances[
            'importance'] / feature_importances['importance'].sum()
        feature_importances['cumulative_importance'] = np.cumsum(
            feature_importances['normalized_importance'])

        # Extract the features with zero importance
        record_zero_importance = feature_importances[
            feature_importances['importance'] == 0.0]

        to_drop = list(record_zero_importance['feature'])

        self.feature_importances = feature_importances
        self.record_zero_importance = record_zero_importance
        self.ops['zero_importance'] = to_drop

        print('\n%d features with zero importance after one-hot encoding.\n' %
              len(self.ops['zero_importance']))
Beispiel #43
0
def train_model():
    print('innn')
    import tensorflow as tf
    from tensorflow import keras
    from PIL import Image
    import numpy as np
    import gc

    global log
    print(request.json['folder'])
    print(request.json['lr'])
    print(request.json['epochs'])
    folder_name = request.json['folder']
    log = r"Initializing variables...."
    gc.enable()
    base_dir = os.path.join(app.config['UPLOAD_FOLDER'],folder_name)
    train_dir = os.path.join(base_dir, 'train')
    validation_dir = os.path.join(base_dir, 'validation')
    image_size = 160
    batch_size = 32

    if(request.json['epochs']):
        epochs = int(request.json['epochs'])
    else:
        epochs = 1
    if(request.json['lr']):
        lr = int(request.json['lr'])
    else:
        lr = 0.001


    train_datagen = keras.preprocessing.image.ImageDataGenerator(
                    rescale=1./255)

    validation_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
                    train_dir,
                    target_size=(image_size, image_size),
                    batch_size=batch_size,
                    class_mode='sparse')

    validation_generator = validation_datagen.flow_from_directory(
                    validation_dir, 
                    target_size=(image_size, image_size),
                    batch_size=batch_size,
                    class_mode='sparse')
    IMG_SHAPE = (image_size, image_size, 3)
    log = r'Creating Model Base to Train'
    base_model = keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet',classes=2)
    base_model.trainable = False
    model = keras.Sequential([base_model,
                            keras.layers.GlobalAveragePooling2D(),
                            keras.layers.Dense(2,activation='sigmoid')])
    log = r'Compiling the Model'
    model.compile(optimizer=keras.optimizers.RMSprop(lr=lr),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    
    steps_per_epoch = train_generator.n // batch_size
    validation_steps = validation_generator.n // batch_size
    print("training started")
    log = r"Training the Model."
    model.fit_generator(train_generator,
                                steps_per_epoch = steps_per_epoch,
                                epochs=epochs,
                                workers=4,
                                validation_data=validation_generator,
                                validation_steps=validation_steps)


    log =  r"Dense Layer's Trainig done..."
    fine_tune_at = 100

    # Freeze all the layers before the `fine_tune_at` layer
    for layer in base_model.layers[:fine_tune_at]:
        layer.trainable =  False

    model.compile(optimizer = tf.keras.optimizers.RMSprop(lr=2e-4),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

    
    log = r'Fine Tuning the model, beginning to train.'
    hist = model.fit_generator(train_generator,
                                   steps_per_epoch = steps_per_epoch,
                                   epochs=epochs,
                                   workers=4,
                                   validation_data=validation_generator,
                                   validation_steps=validation_steps)

    
    log = r'Model is done training,Acc:{}, Loss:{}, Val_acc:{}, Val_loss:{}'.format(hist.history['acc'],hist.history['loss'],hist.history['val_acc'],hist.history['val_loss'])
    model_path = base_dir + '_model_without_fine_tune.h5'
    model.save(model_path)
    # gc.collect()
    print(model_path)
    print(folder_name + '_model_without_fine_tune.h5')
    return jsonify({"success":True,"modelLink": folder_name + '_model_without_fine_tune.h5'})
def sra2illumina(input_file,
                 output_file,
                 tag_read = None,
                 tag='',
                 phred_conversion = False,
                 operation = 'change',
                 tmp_dir = None,
                 size_read_buffer = 10**8):
    """
    It converts the FASTQ file (PHRED-33 qualities and SRA read names) downloaded
    from Short Read Archive (SRA) to Illumina FASTQ file (PHRED-64 Illumina v1.5
    and Illumina read names).
    """
    temp_file = None
    if phred_conversion:
        temp_file = give_me_temp_filename(tmp_dir)
    else:
        temp_file = output_file

    read_name = file(input_file,'r').readline().rstrip('\r\n')
    sra = False
    e = read_name.partition(" ")[0]
    if read_name.startswith('@') and ( not(e.endswith('/1') or e.endswith('/2'))):
        sra = True

    if operation == 'change' or sra:
        fid = open(input_file,'r')
        fod = open(temp_file,'w')
        i = 0
        r = 0
        while True:
            gc.disable()
            lines = fid.readlines(size_read_buffer)
            gc.enable()
            if not lines:
                break
            n = len(lines)
            for j in xrange(n):
                r = r + 1
                i = i + 1
                if i == 1:
                    if tag_read:
                        lines[j] = '@%s%s%s\n' % (tag_read ,int2str(r,12) , tag)
                    else: # if there is no tag_read then the original SRA id is left
                        lines[j] = '%s%s\n' % (lines[j][:-1].partition(" ")[0], tag)
                    #lines[j] = lines[j].rstrip('\r\n').upper().split(' ')[1]+tag+'\n'
                elif i == 3:
                    lines[j] = "+\n"
                elif i == 4:
                    i = 0
            fod.writelines(lines)
        fid.close()
        fod.close()
        if phred_conversion == '64':
            phred.fq2fq(temp_file,'sanger',output_file,'illumina-1.5',tmp_dir = tmp_dir)
            os.remove(temp_file)
        elif phred_conversion == '33':
            phred.fq2fq(temp_file,'auto-detect',output_file,'sanger',tmp_dir = tmp_dir)
            os.remove(temp_file)
    else:
        print "No changes are done!"
        if os.path.isfile(output_file):
            os.remove(output_file)
        if operation == 'soft':
            if os.path.islink(input_file):
                linkto = os.readlink(input_file)
                os.symlink(linkto,ooutput_file)
            else:
                os.symlink(input_file,output_file)
        elif operation == 'hard':
            linkto = input_file
            if os.path.islink(input_file):
                linkto = os.readlink(input_file)
            try:
                os.link(linkto,output_file)
            except OSError as er:
                print >>sys.stderr,"WARNING: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file)
                shutil.copyfile(linkto,output_file)
#                if er.errno == errno.EXDEV:
#                    # they are on different partitions
#                    # [Errno 18] Invalid cross-device link
#                    shutil.copyfile(linkto,output_file)
#                else:
#                    print >>sys.stderr,"ERROR: Cannot do hard links ('%s' and '%s')!" % (linkto,output_file)
#                    print >>sys.stderr,er
#                    sys.exit(1)

        elif operation == 'copy':
            shutil.copyfile(input_file, output_file)
        else:
            print >>sys.stderr, "ERROR: unknown operation of linking!", operation
            sys.exit(1)
def merge_star_chimeric(psl_in, psl_ou):
    #
    psl = []
    fou = None
    if psl_ou == '-':
        fou = sys.stdout
    else:
        fou = open(psl_ou, 'w')
    limit_psl = 10**5

    for box in chunks(psl_in):
        if len(box) == 2:
            if box[0][psl_strand] != box[1][psl_strand]:
                continue
            merged = None

            temp = box[0][:]

            r1_start = int(box[0][psl_qStart])
            r2_start = int(box[1][psl_qStart])
            if r1_start > r2_start:
                box = (box[1], box[0])

            r1_start = int(box[0][psl_qStart])
            r1_end = int(box[0][psl_qEnd])
            r2_start = int(box[1][psl_qStart])
            r2_end = int(box[1][psl_qEnd])

            t1_start = int(box[0][psl_tStart])
            t1_end = int(box[0][psl_tEnd])
            t2_start = int(box[1][psl_tStart])
            t2_end = int(box[1][psl_tEnd])

            if t1_start > t2_start:
                continue

            wiggle = 9
            if r1_end + wiggle > r2_start and r1_end < r2_start:
                dif = r2_start - r1_end

                # extend the first
                #box[0][psl_matches] = str(int(box[0][psl_matches]))
                #box[0][psl_misMatches] = str(int(box[0][psl_misMatches]) + dif)

                box[0][psl_qEnd] = str(int(box[0][psl_qEnd]) + dif)
                box[0][psl_tEnd] = str(int(box[0][psl_tEnd]) + dif)

                t = box[0][psl_blockSizes].split(',')
                t[-2] = str(int(t[-2]) + dif)
                box[0][psl_blockSizes] = ','.join(t)

                # recompute
                r1_start = int(box[0][psl_qStart])
                r1_end = int(box[0][psl_qEnd])

                t1_start = int(box[0][psl_tStart])
                t1_end = int(box[0][psl_tEnd])

            elif r1_end > r2_start and r1_end < r2_start + wiggle:
                dif = r2_start - r1_end

                # cut the second
                box[1][psl_matches] = str(int(box[1][psl_matches]) - dif)
                box[1][psl_misMatches] = str(int(box[1][psl_misMatches]) + dif)

                box[1][psl_qStart] = str(int(box[1][psl_qStart]) + dif)
                box[1][psl_tStart] = str(int(box[1][psl_tStart]) + dif)

                t = box[1][psl_blockSizes].split(',')
                t[0] = str(int(t[0]) - dif)
                box[1][psl_blockSizes] = ','.join(t)

                t = box[1][psl_qStarts].split(',')
                t[0] = str(int(t[0]) + dif)
                box[1][psl_qStarts] = ','.join(t)

                t = box[1][psl_tStarts].split(',')
                t[0] = str(int(t[0]) + dif)
                box[1][psl_tStarts] = ','.join(t)

                # recompute
                r2_start = int(box[1][psl_qStart])
                r2_end = int(box[1][psl_qEnd])

                t2_start = int(box[1][psl_tStart])
                t2_end = int(box[1][psl_tEnd])

            if r1_end <= r2_start and t1_end <= t2_start:  #and box[0][psl_strand] == "+" :
                temp[psl_matches] = int(box[0][psl_matches]) + int(
                    box[1][psl_matches])
                temp[psl_misMatches] = int(box[0][psl_misMatches]) - int(
                    box[1][psl_matches])

                temp[psl_qNumInsert] = int(box[0][psl_qNumInsert]) + int(
                    box[1][psl_qNumInsert])
                temp[psl_qBaseInsert] = int(box[0][psl_qBaseInsert]) + int(
                    box[1][psl_qBaseInsert])
                temp[psl_tNumInsert] = int(box[0][psl_tNumInsert]) + int(
                    box[1][psl_tNumInsert])
                temp[psl_tBaseInsert] = int(box[0][psl_tBaseInsert]) + int(
                    box[1][psl_tBaseInsert])

                temp[psl_qStart] = r1_start
                temp[psl_qEnd] = r2_end

                temp[psl_tStart] = t1_start
                temp[psl_tEnd] = t2_end

                temp[psl_blockCount] = int(box[0][psl_blockCount]) + int(
                    box[1][psl_blockCount])
                temp[psl_blockSizes] = box[0][psl_blockSizes] + box[1][
                    psl_blockSizes]

                temp[psl_qStarts] = box[0][psl_qStarts] + box[1][psl_qStarts]

                temp[psl_tStarts] = box[0][psl_tStarts] + box[1][psl_tStarts]
                temp[psl_tNumInsert] = '1'

                merged = temp

            if merged:
                gc.disable()
                psl.append(map(str, merged))
                gc.enable()
                if len(psl) >= limit_psl:
                    fou.writelines(['\t'.join(line) + '\n' for line in psl])
                    psl = []
    # output PSL
    if psl:
        fou.writelines(['\t'.join(line) + '\n' for line in psl])
Beispiel #46
0
from skimage.io import imread
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
# from skimage.util.montage import montage2d as montage
from skimage.morphology import binary_opening, disk
from sklearn.model_selection import train_test_split
from skimage.morphology import label
from keras.preprocessing.image import ImageDataGenerator
from keras import models, layers
import keras.backend as K
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from tqdm import tqdm

import gc; gc.enable()

# montage_rgb = lambda x: np.stack([montage(x[:, :, :, i]) for i in range(x.shape[3])], -1)
ship_dir = 'F:\\Shiga\\kaggle\\AirbusShipDetection'
train_image_dir = os.path.join(ship_dir, 'train')
test_image_dir = os.path.join(ship_dir, 'test')


def multi_rle_encode(img):
    labels = label(img)
    if img.ndim > 2:
        return [rle_encode(np.sum(labels==k, axis=2)) for k in np.unique(labels[labels>0])]
    else:
        return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
Beispiel #47
0
def post_hook():
    import gc
    gc.enable()
Beispiel #48
0
 def tearDown(self):
     if self.using_gc:
         gc.enable()
def train(model, task, y_list, x_list, checkpoint_dir, checkpoint_prefix, device, batch_size=512, max_seq_len=100, lr=1e-3, resume_surfix=None, logger=None):
	"""
	: model - torch.nn.module: model to be trained
	: task - list[tuple(int,list[int])]: epoch + file to train
	: y_list - list[str]: list of y variables
	: x_list - list[str]: list of x variables to generate embed sequence for
	: checkpoint_dir - str: path to checkpoint directory
	: checkpoint_prefix - str: prefix of checkpoint file
	: device - torch.device: device to train the model
	: batch_size - int: size of mini batch
	: max_seq_len - int: max length for sequence input, default 100 
	: lr - float: learning rate for Adam, default 1e-3
	: resume_surfix - str: model to reload if not training from scratch
	"""
	global input_split_path, embed_path
	if not gc.isenabled(): gc.enable()

	# Check checkpoint directory
	if not os.path.isdir(checkpoint_dir): os.mkdir(checkpoint_dir)

	# Calculate number of batch
	div, mod = divmod(90000, batch_size)
	batch_per_file = div + min(1, mod)
	batch_per_epoch = 9 * batch_per_file

	# Load model if not train from scratch
	loss_fn = nn.CrossEntropyLoss()
	optimizer = torch.optim.Adam(model.parameters(), lr=lr, amsgrad=True)
	scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=0, threshold=1e-5, threshold_mode='abs')

	if resume_surfix is not None:
		model_artifact_path = os.path.join(checkpoint_dir, '{}_{}.pth'.format(checkpoint_prefix, resume_surfix))
		model.load_state_dict(torch.load(model_artifact_path))
		if logger: logger.info('Model loaded from {}'.format(model_artifact_path))
		optimizer_artifact_path = os.path.join(checkpoint_dir, '{}_{}_opti.pth'.format(checkpoint_prefix, resume_surfix))
		if logger: logger.info('Optimizer loaded from {}'.format(optimizer_artifact_path))

	model.to(device)
	
	# Initiate word vector host
	wv = wv_loader_v2(x_list, embed_path, max_seq_len=max_seq_len)
	if logger: logger.info('Word vector host ready')
	
	# Main Loop
	for epoch, file_idx_list in task:
		if logger:
			logger.info('=========================')
			logger.info('Processing Epoch {}/{}'.format(epoch, task[-1][0]))
			logger.info('=========================')

		# Train model
		model.train()
		train_running_loss, train_n_batch = 0, 0

		for index, split_idx in enumerate(file_idx_list, start=1):
			dl = data_loader_v2(wv, y_list, x_list, input_split_path, split_idx, batch_size=batch_size, shuffle=True)
			it = iter(dl)
			while True:
				try:
					yl, xl, x_seq_len = next(it)
					y = torch.add(yl[0], yl[1], alpha=10).to(device)
					x = [i.to(device) for i in xl] + [x_seq_len-1]

					optimizer.zero_grad()
					yp = F.softmax(model(*x), dim=1)
					loss = loss_fn(yp,y)

					loss.backward()
					torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=100)
					optimizer.step()

					train_running_loss += loss.item()
					train_n_batch += 1

				except StopIteration:
					break

				except Exception as e:
					if logger: logger.error(e)
					return 

			del dl, it
			_ = gc.collect()

			if logger:
				logger.info('Epoch {}/{} - File {}/8 Done - Train Loss: {:.6f}, Learning Rate {:.7f}'.format(epoch, task[-1][0], index, train_running_loss/train_n_batch, optimizer.param_groups[0]['lr']))

			# Save model & optimizer state dict
			ck_file_name = '{}_{}_{}.pth'.format(checkpoint_prefix, epoch, split_idx)
			ck_file_path = os.path.join(checkpoint_dir, ck_file_name)
			torch.save(model.state_dict(), ck_file_path)
			op_file_name = '{}_{}_{}_opti.pth'.format(checkpoint_prefix, epoch, split_idx)
			op_file_path = os.path.join(checkpoint_dir, op_file_name)
			torch.save(optimizer.state_dict(), op_file_path)

		torch.cuda.empty_cache()

		# Evaluate model
		model.eval()
		test_running_loss, test_n_batch = 0, 0
		true_y, pred_y = [], []

		with torch.no_grad():
			for split_idx in [9, 10]:
				dl = data_loader_v2(wv, y_list, x_list, input_split_path, split_idx, batch_size=batch_size, shuffle=True)
				it = iter(dl)
				while True:
					try:
						yl, xl, x_seq_len = next(it)
						y = torch.add(yl[0], yl[1], alpha=10).to(device)
						x = [i.to(device) for i in xl] + [x_seq_len-1]
						yp = F.softmax(model(*x), dim=1)
						loss = loss_fn(yp,y)

						pred_y.extend(list(yp.cpu().detach().numpy()))
						true_y.extend(list(y.cpu().detach().numpy()))

						test_running_loss += loss.item()
						test_n_batch += 1

					except StopIteration:
						break

					except Exception as e:
						if logger: logger.error(e)
						return 

				del dl, it
				_ = gc.collect()

		pred = np.argmax(np.array(pred_y), 1)
		true = np.array(true_y).reshape((-1,))
		age_acc = accuracy_score(true%10, pred%10)
		gen_acc = accuracy_score(true//10, pred//10)

		del pred, true, pred_y, true_y
		_ = gc.collect()

		if logger:
			logger.info('Epoch {}/{} Done - Test Loss: {:.6f}, Age Accuracy: {:.6f}, Gender Accuracy: {:.6f}, Combined Accuracy: {:.6f}'.format(
				epoch, task[-1][0], test_running_loss/test_n_batch, age_acc, gen_acc, age_acc+gen_acc))

		scheduler.step(test_running_loss/test_n_batch)
		if logger:
			logger.info('Epoch {}/{} - Updated Learning Rate: {:.8f}'.format(epoch, task[-1][0], optimizer.param_groups[0]['lr']))
 def _garbageCollect(self, task = None):
     gc.enable()
     gct = GCTrigger()
     gc.disable()
     return Task.cont
Beispiel #51
0
# In[ ]:

import os
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
from skimage.util.montage import montage2d as montage
montage_rgb = lambda x: np.stack(
    [montage(x[:, :, :, i]) for i in range(x.shape[3])], -1)
ship_dir = '../input'
train_image_dir = os.path.join(ship_dir, 'train_v2')
test_image_dir = os.path.join(ship_dir, 'test_v2')
import gc
gc.enable()  # memory is tight

# In[ ]:

masks = pd.read_csv(
    os.path.join('../input/', 'train_ship_segmentations_v2.csv'))
print(masks.shape[0], 'masks found')
print(masks['ImageId'].value_counts().shape[0])
masks['path'] = masks['ImageId'].map(
    lambda x: os.path.join(train_image_dir, x))
masks.head()

# # Split into training and validation groups
# We stratify by the number of boats appearing so we have nice balances in each set

# In[ ]:
    def buildIndex(self):
        '''main of the program, creates the index'''

        gc.enable()
        self.index = defaultdict(lambda: array('L'))  # main index
        lengths = {}  # for calculating and storing document (cosine) lengths
        for doc in open(wdir + 'documents.list', 'rt'):
            fname = doc.rstrip()  # documents/LN-20020102023.vert
            path = wdir + fname
            f = gzip.open(path + '.gz', 'rt')

            # Parse file into sections and append text
#             parsedDoc = self.parseDoc(f)  # returns a dictionary of parsed xml sections
#             text = ''.join([v for k, v in parsedDoc.items() if v is not None and k != "docid"])
#             docid = parsedDoc["docid"]
#             if docid[0] == 'L':
#                 docid = '1' + docid[7:]     # begins with LN
#             else:
#                 docid = '2' + docid[7:]     # begins with MF
#
#             docid = int(docid)

            docid, text = self.parseDoc(f)
            docid = self.truncateDocid(docid)
#             print("processing doc " + str(docid))

            pattern = (r"^[0-9]+\s+"  # word number
               "([a-zěščřžťďňńáéíýóůA-ZĚŠČŘŽŤĎŇŃÁÉÍÝÓŮ]+)[0-9]*\s+"  # form
               "[a-zěščřžťďňńáéíýóůA-ZĚŠČŘŽŤĎŇŃÁÉÍÝÓŮ]+[0-9]*[-_]?.*\s+"  # lemma
               "[A-ZĚŠČŘŽŤĎŇŃÁÉÍÝÓŮ0-9-=]+\s+"
               "[a-zěščřžťďňńáéíýóůA-ZĚŠČŘŽŤĎŇŃÁÉÍÝÓŮ]+$")

            tokens = re.findall(pattern, text, re.MULTILINE)
            counts = Counter(tokens)
#             print(counts)

            length = 0
            for token, cnt in counts.items():
                idPlusTf = self.combineInts(docid, cnt)
                length += cnt * cnt  # add sqrd components

#                 if token not in self.index:
                self.index[token].append(idPlusTf)  # append a new entry and postings list
                    #self.lexicon[token].append(token)
            lengths[docid] = math.sqrt(length)  # sqrt

#                 else:
#                     self.index[token].append(idPlusTf)
#                   if docid not in postings:
#                     postings.append(idPlusTf)
#                     del postings
            del tokens
            del counts

            gc.collect()

        self.writeIndex()
        self.writeOffsets()
#
#                   length = 0
#             for token, cnt in counts.items():
#                 length += cnt * cnt
#             lengths[docid] = math.sqrt(length)
#             del tokens
#             del counts
#             gc.collect()
#
        with gzip.open(wdir + '/output/lengths.gz', 'wt') as f:
            print("writing doc length")
            for docid, length in lengths.items():
#                 print(self.expandDocid(docid) + '\t' + str(length) + '\n')
                f.write(self.expandDocid(docid) + '\t' + str(length) + '\n')
Beispiel #53
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--only_augmenters",
        type=str,
        help="Names of augmenters to measure, regexes, delimiter is ','.")
    parser.add_argument("--nosave",
                        action="store_true",
                        help="Whether not to save any results")
    args = parser.parse_args()
    if args.only_augmenters is not None:
        args.only_augmenters = [
            name.strip() for name in args.only_augmenters.split(",")
        ]
    args.save = (args.nosave is not True)

    if not args.save:
        print("[NOTE] will not save data")

    iterations = 100
    batch_sizes = [1, 128]
    backgrounds = [False]

    print("---------------------------")
    print("Images")
    print("---------------------------")
    results_images = []
    base_image = skimage.data.astronaut()
    images = [
        ia.imresize_single_image(base_image, (64, 64)),
        ia.imresize_single_image(base_image, (224, 224))
    ]

    for image in images:
        print("")
        print("image size: %s" % (image.shape, ))
        augmenters = create_augmenters(height=image.shape[0],
                                       width=image.shape[1],
                                       height_augmentable=image.shape[0],
                                       width_augmentable=image.shape[1],
                                       only_augmenters=args.only_augmenters)
        for batch_size in batch_sizes:
            if batch_size != batch_sizes[0]:
                print("")
            print("batch_size: %d" % (batch_size, ))

            for background in backgrounds:
                for augmenter in augmenters:
                    images_batch = np.uint8([image] * batch_size)

                    ia.seed(1)
                    times = []
                    gc.disable()  # as done in timeit
                    if not background:
                        for _ in sm.xrange(iterations):
                            time_start = time.time()
                            _img_aug = augmenter.augment_images(images_batch)
                            time_end = time.time()
                            times.append(time_end - time_start)
                    else:
                        batches = [
                            ia.Batch(images=images_batch)
                            for _ in sm.xrange(iterations)
                        ]
                        for _ in sm.xrange(iterations):
                            time_start = time.time()
                            gen = augmenter.augment_batches(batches,
                                                            background=True)
                            for _batch_aug in gen:
                                pass
                            time_end = time.time()
                            times.append(time_end - time_start)
                    gc.enable()

                    results_images.append({
                        "augmentable": "images",
                        "background": background,
                        "image.shape": image.shape,
                        "batch_size": batch_size,
                        "augmenter.name": augmenter.name,
                        "times": times
                    })

                    items_per_sec = (1 / np.average(times)) * batch_size
                    mbit_per_img = (image.size * image.dtype.itemsize *
                                    8) / 1024 / 1024
                    mbit_per_sec = items_per_sec * mbit_per_img
                    print("IMG | HxW=%s B=%d %s "
                          "| SUM %10.5fs "
                          "| ITER avg %10.5fs, min %10.5fs, max %10.5fs "
                          "| img/s %11.3f "
                          "| mbit/s %9.3f, mbyte/s %9.3f "
                          "| %s" %
                          (image.shape[0:2], batch_size, "BG" if background
                           else "FG", float(np.sum(times)), np.average(times),
                           np.min(times), np.max(times), items_per_sec,
                           mbit_per_sec, mbit_per_sec / 8, augmenter.name))

    if args.save:
        current_dir = os.path.dirname(__file__)
        target_dir = os.path.join(current_dir, "measure_performance_results")
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        with open(os.path.join(target_dir, "results_images.pickle"),
                  "wb") as f:
            pickle.dump(results_images, f, protocol=-1)

    print("---------------------------")
    print("Heatmaps")
    print("---------------------------")
    results_heatmaps = []
    for nb_heatmaps in [1, 5]:  # per image
        base_image = skimage.data.astronaut()
        images = [
            ia.imresize_single_image(base_image, (64, 64)),
            ia.imresize_single_image(base_image, (224, 224))
        ]
        heatmaps = [
            np.tile(heatmap[..., 0:1], (1, 1, nb_heatmaps))
            for heatmap in iaa.Grayscale(1.0).augment_images(images)
        ]
        heatmaps_ois = [
            ia.HeatmapsOnImage(heatmap.astype(np.float32) / 255.0,
                               shape=(224, 224, 3)) for heatmap in heatmaps
        ]

        for heatmaps_oi in heatmaps_ois:
            print("")
            print("heatmap size: %s (on image: %s)" % (
                heatmaps_oi.arr_0to1.shape,
                heatmaps_oi.shape,
            ))
            augmenters = create_augmenters(
                height=heatmaps_oi.shape[0],
                width=heatmaps_oi.shape[1],
                height_augmentable=heatmaps_oi.arr_0to1.shape[0],
                width_augmentable=heatmaps_oi.arr_0to1.shape[1],
                only_augmenters=args.only_augmenters)
            for batch_size in batch_sizes:
                if batch_size != batch_sizes[0]:
                    print("")
                print("batch_size: %d" % (batch_size, ))
                for background in backgrounds:
                    for augmenter in augmenters:
                        heatmaps_oi_batch = [heatmaps_oi] * batch_size

                        ia.seed(1)
                        times = []
                        gc.disable()  # as done in timeit
                        if not background:
                            for _ in sm.xrange(iterations):
                                time_start = time.time()
                                _hms_aug = augmenter.augment_heatmaps(
                                    heatmaps_oi_batch)
                                time_end = time.time()
                                times.append(time_end - time_start)
                                gc.collect()
                        else:
                            batches = [
                                ia.Batch(heatmaps=heatmaps_oi_batch)
                                for _ in sm.xrange(iterations)
                            ]
                            for _ in sm.xrange(iterations):
                                time_start = time.time()
                                gen = augmenter.augment_batches(
                                    batches, background=True)
                                for _batch_aug in gen:
                                    pass
                                time_end = time.time()
                                times.append(time_end - time_start)
                                gc.collect()
                        gc.disable()

                        results_heatmaps.append({
                            "augmentable":
                            "heatmaps",
                            "background":
                            background,
                            "nb_heatmaps":
                            nb_heatmaps,
                            "heatmaps_oi.arr_0to1.shape":
                            heatmaps_oi.arr_0to1.shape,
                            "heatmaps_oi.shape":
                            heatmaps_oi.shape,
                            "batch_size":
                            batch_size,
                            "augmenter.name":
                            augmenter.name,
                            "times":
                            times
                        })

                        h, w, c = heatmaps_oi.arr_0to1.shape
                        items_per_sec = (1 /
                                         np.average(times)) * batch_size * c
                        mbit_per_img = (h * w *
                                        heatmaps_oi.arr_0to1.dtype.itemsize *
                                        8) / 1024 / 1024
                        mbit_per_sec = items_per_sec * mbit_per_img
                        print("HMs | HxWxN=%s (on %s) B=%d %s "
                              "| SUM %10.5fs "
                              "| ITER avg %10.5fs, min %10.5fs, max %10.5fs "
                              "| hms/s %11.3f "
                              "| mbit/s %9.3f, mbyte/s %9.3f "
                              "| %s" %
                              (heatmaps_oi.arr_0to1.shape[0:3],
                               heatmaps_oi.shape[0:2], batch_size,
                               "BG" if background else "FG",
                               float(np.sum(times)), np.average(times),
                               np.min(times), np.max(times), items_per_sec,
                               mbit_per_sec, mbit_per_sec / 8, augmenter.name))

    if args.save:
        current_dir = os.path.dirname(__file__)
        target_dir = os.path.join(current_dir, "measure_performance_results")
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        with open(os.path.join(target_dir, "results_heatmaps.pickle"),
                  "wb") as f:
            pickle.dump(results_heatmaps, f, protocol=-1)

    print("---------------------------")
    print("Keypoints")
    print("---------------------------")
    results_keypoints = []
    for nb_points in [1, 10]:  # per image
        base_image = skimage.data.astronaut()
        h, w = base_image.shape[0:2]
        if nb_points == 1:
            keypoints = [
                ia.Keypoint(x=x * w, y=y * h) for y, x in [(0.4, 0.4)]
            ]
        else:
            keypoints = [
                ia.Keypoint(x=x * w, y=y * h)
                for y, x in [(0.2, 0.2), (0.3, 0.3), (0.4, 0.4), (
                    0.6, 0.6), (0.7,
                                0.7), (0.8,
                                       0.8), (0.5,
                                              0.25), (0.5,
                                                      0.75), (0.25,
                                                              0.5), (0.75,
                                                                     0.5)]
            ]
        base_image_kpoi = ia.KeypointsOnImage(keypoints, shape=(224, 224, 3))
        images = [
            ia.imresize_single_image(base_image, (64, 64)),
            ia.imresize_single_image(base_image, (224, 224))
        ]
        keypoints_on_images = [
            base_image_kpoi.on(image.shape) for image in images
        ]

        for keypoints_on_image in keypoints_on_images:
            print("")
            print("#points: %d (on image: %s)" % (
                len(keypoints_on_image.keypoints),
                keypoints_on_image.shape,
            ))
            augmenters = create_augmenters(
                height=keypoints_on_image.shape[0],
                width=keypoints_on_image.shape[1],
                height_augmentable=keypoints_on_image.shape[0],
                width_augmentable=keypoints_on_image.shape[1],
                only_augmenters=args.only_augmenters)
            for batch_size in batch_sizes:
                if batch_size != batch_sizes[0]:
                    print("")
                print("batch_size: %d" % (batch_size, ))
                for background in backgrounds:
                    for augmenter in augmenters:
                        keypoints_on_image_batch = [keypoints_on_image
                                                    ] * batch_size

                        ia.seed(1)
                        times = []
                        gc.disable()  # as done in timeit
                        if not background:
                            for _ in sm.xrange(iterations):
                                time_start = time.time()
                                _kps_aug = augmenter.augment_keypoints(
                                    keypoints_on_image_batch)
                                time_end = time.time()
                                times.append(time_end - time_start)
                                gc.collect()
                        else:
                            batches = [
                                ia.Batch(keypoints=keypoints_on_image_batch)
                                for _ in sm.xrange(iterations)
                            ]
                            for _ in sm.xrange(iterations):
                                time_start = time.time()
                                gen = augmenter.augment_batches(
                                    batches, background=True)
                                for _batch_aug in gen:
                                    pass
                                time_end = time.time()
                                times.append(time_end - time_start)
                        gc.enable()

                        results_keypoints.append({
                            "augmentable":
                            "keypoints",
                            "background":
                            background,
                            "nb_points":
                            len(keypoints_on_image.keypoints),
                            "keypoints_on_image.shape":
                            keypoints_on_image.shape,
                            "batch_size":
                            batch_size,
                            "augmenter.name":
                            augmenter.name,
                            "times":
                            times
                        })

                        items_per_sec = (1 /
                                         np.average(times)) * batch_size * len(
                                             keypoints_on_image.keypoints)
                        mbit_per_img = (len(keypoints_on_image.keypoints) * 2 *
                                        32) / 1024 / 1024
                        mbit_per_sec = items_per_sec * mbit_per_img
                        print("KPs | #points=%d (on %s) B=%d %s "
                              "| SUM %10.5fs "
                              "| ITER avg %10.5fs, min %10.5fs, max %10.5fs "
                              "| kps/s %11.3f "
                              "| mbit/s %9.3f, mbyte/s %9.3f "
                              "| %s" %
                              (len(keypoints_on_image.keypoints),
                               keypoints_on_image.shape[0:2], batch_size,
                               "BG" if background else "FG",
                               float(np.sum(times)), np.average(times),
                               np.min(times), np.max(times), items_per_sec,
                               mbit_per_sec, mbit_per_sec / 8, augmenter.name))

    if args.save:
        current_dir = os.path.dirname(__file__)
        target_dir = os.path.join(current_dir, "measure_performance_results")
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)
        with open(os.path.join(target_dir, "results_keypoints.pickle"),
                  "wb") as f:
            pickle.dump(results_keypoints, f, protocol=-1)
Beispiel #54
0
    def __execute_child(self, args, executable, preexec_fn, close_fds, cwd,
                        env, universal_newlines, startupinfo, creationflags,
                        shell, p2cread, p2cwrite, c2pread, c2pwrite, errread,
                        errwrite):
        """
        Executes the program using posix_spawn().

        This is based on the method from the superclass but the
        posix_spawn API forces a number of changes.  In particular:

        * When using fork() FDs are manipulated in the child process
          after the fork, but before the program is exec()ed.  With
          posix_spawn() this is done by passing a data-structure to
          the posix_spawn() call, which describes the FD manipulations
          to perform.

        * The fork() version waits until after the fork before
          unsetting the non-blocking flag on the FDs that the child
          has inherited.  In the posix_spawn() version, we cannot
          do that after the fork so we dup the FDs in advance and
          unset the flag on the duped FD, which we then pass to the
          child.
        """

        if preexec_fn is not None:
            raise NotImplementedError("preexec_fn not supported")
        if close_fds:
            raise NotImplementedError("close_fds not implemented")
        if cwd:
            raise NotImplementedError(
                "cwd not implemented")  # pragma: no cover
        if universal_newlines:
            raise NotImplementedError()  # pragma: no cover
        assert startupinfo is None and creationflags == 0

        _log.debug("Pipes: p2c %s, %s; c2p %s, %s; err %s, %s", p2cread,
                   p2cwrite, c2pread, c2pwrite, errread, errwrite)

        if isinstance(args, types.StringTypes):
            args = [args]
        else:
            args = [a.encode("ascii") for a in args]

        if shell:
            args = ["/bin/sh", "-c"] + args
            if executable:
                args[0] = executable

        if executable is None:
            executable = args[0]

        self._loop.install_sigchld()

        # The FileActions object is an ordered list of FD operations for
        # posix_spawn to do in the child process before it execs the new
        # program.
        file_actions = FileActions()

        # In the child, close parent's pipe ends.
        if p2cwrite is not None:
            file_actions.add_close(p2cwrite)
        if c2pread is not None:
            file_actions.add_close(c2pread)
        if errread is not None:
            file_actions.add_close(errread)

        # When duping fds, if there arises a situation where one of the fds
        # is either 0, 1 or 2, it is possible that it is overwritten (#12607).
        fds_to_close_in_parent = []
        if c2pwrite == 0:
            c2pwrite = os.dup(c2pwrite)
            fds_to_close_in_parent.append(c2pwrite)
        if errwrite == 0 or errwrite == 1:
            errwrite = os.dup(errwrite)
            fds_to_close_in_parent.append(errwrite)

        # Dup stdin/out/err FDs in child.
        def _dup2(dup_from, dup_to):
            if dup_from is None:
                # Pass through the existing FD.
                dup_from = dup_to
            # Need to take a dup so we can remove the non-blocking flag
            a_dup = os.dup(dup_from)
            _log.debug("Duped %s as %s", dup_from, a_dup)
            fds_to_close_in_parent.append(a_dup)
            self._remove_nonblock_flag(a_dup)
            file_actions.add_dup2(a_dup, dup_to)

        _dup2(p2cread, 0)
        _dup2(c2pwrite, 1)
        _dup2(errwrite, 2)

        # Close pipe fds in the child.  Make sure we don't close the same fd
        # more than once, or standard fds.
        for fd in set([p2cread, c2pwrite, errwrite]):
            if fd > 2:
                file_actions.add_close(fd)

        gc_was_enabled = gc.isenabled()
        # FIXME Does this bug apply to posix_spawn version?
        try:
            # Disable gc to avoid bug where gc -> file_dealloc ->
            # write to stderr -> hang.  http://bugs.python.org/issue1336
            gc.disable()
            self.pid = posix_spawnp(
                executable,
                args,
                file_actions=file_actions,
                env=env,
            )
        except:
            if gc_was_enabled:
                gc.enable()
            raise
        finally:
            for fd in fds_to_close_in_parent:
                os.close(fd)

        # Capture the SIGCHILD.
        self._watcher = self._loop.child(self.pid)
        self._watcher.start(self._on_child, self._watcher)

        if gc_was_enabled:
            gc.enable()

        # Close the Child's pipe ends in the parent.
        if p2cread is not None and p2cwrite is not None:
            os.close(p2cread)
        if c2pwrite is not None and c2pread is not None:
            os.close(c2pwrite)
        if errwrite is not None and errread is not None:
            os.close(errwrite)
Beispiel #55
0
def sort_columns(input_filename='-',
                 output_filename='-',
                 columns=None,
                 header=False,
                 ignore_case=False,
                 unique=False,
                 tmp_dir=None,
                 buffer_size='80%',
                 parallel=multiprocessing.cpu_count(),
                 compress_program=None):
    """
    It sorts the input file (text tab separated file) based on the specified columns.
    It works like SELECT * ORDER BY in SQL.
    """
    import locale

    locale.setlocale(locale.LC_ALL, 'C')

    # check options suppported by SORT command
    sh1 = give_me_temp_filename(tmp_dir)
    sh2 = give_me_temp_filename(tmp_dir)
    # check options suppported by SORT command
    sort_parallel = False
    r = os.system("sort --help | grep 'parallel' > '%s'" % (sh1, ))
    if (not r) and (not empty(sh1)) and len(file(sh1, 'r').readlines()) == 1:
        sort_parallel = True
    delete_file(sh1)
    # check options suppported by SORT command
    sort_buffer = False
    r = os.system("sort --help | grep 'buffer-size' > '%s'" % (sh1, ))
    if (not r) and (not empty(sh1)) and len(file(sh1, 'r').readlines()) == 1:
        sort_buffer = True
    delete_file(sh1)
    # check options suppported by SORT command
    sort_compress = False
    if compress_program:
        r = os.system(
            "sort --help | grep 'compress-program' > '%s' ; %s --help 2>/dev/null | grep -i 'compress' > '%s'"
            % (sh1, compress_program, sh2))
        if (not r) and (
            (not empty(sh1)) and len(file(sh1, 'r').readlines()) == 1 and
            (not empty(sh2)) and len(file(sh2, 'r').readlines()) >= 1):
            sort_compress = True
        delete_file(sh1)
        delete_file(sh2)

    # treat the case when the input file is coming from the standard input
    fin = input_filename.strip('"').strip("'")
    if fin == '-':
        fin = give_me_temp_filename(tmp_dir)
        fod = open(fin, 'w')
        fid = sys.stdin
        while True:
            lines = fid.readlines(10**8)
            if not lines:
                break
            fod.writelines(lines)
        fod.close()

    fon = output_filename.strip('"').strip("'")
    if fon == '-':
        fon = give_me_temp_filename(tmp_dir)

    if header:
        header_saved = file(fin, 'r').readline()
        file(output_filename, 'w').write(header_saved)
    else:
        file(output_filename, 'w').write('')

    # process the type of the column, numeric, or string
    first_line = file(fin, 'r').readline()
    if first_line:
        nc = len(file(fin, 'r').readline().rstrip('\r\n').split(
            '\t'))  #read first line in order to find out the number of columns
        if columns:
            columns = columns.strip().lower()
            if columns == 'd':
                columns = ','.join([str(i + 1) + 'd' for i in range(nc)])
            elif columns == 'n':
                columns = ','.join([str(i + 1) + 'n' for i in range(nc)])
            elif columns == 'nd' or columns == 'dn':
                columns = ','.join([str(i + 1) + 'nd' for i in range(nc)])
        else:
            columns = ','.join([str(i + 1) for i in range(nc)])

        # extra parameters
        extra = ""

        if sort_buffer and buffer_size and buffer_size != 'no' and buffer_size != 'none':
            extra = extra + ' --buffer-size=' + str(buffer_size) + ' '
        if sort_parallel and parallel and parallel > 1:
            extra = extra + ' --parallel=' + str(parallel) + ' '
        if sort_compress and compress_program and compress_program.lower(
        ) != 'no' and compress_program.lower() != 'none':
            extra = extra + ' --compress-program=' + compress_program + ' '

        # processing the input columns
        columns = [
            '-k ' + el + ',' + el.replace('n', '').replace('r', '')
            for el in columns.replace('d', 'r').split(',')
        ]
        comd = "-s -t '\t' " + " ".join(columns)
        if ignore_case:
            comd = "-f " + comd
        if unique:
            comd = "-u " + comd
        if tmp_dir:
            comd = "-T '" + tmp_dir + "' " + comd
        if header:
            comd = "LC_ALL=C sed 1d '" + fin + "' | LC_ALL=C sort " + extra + comd + " >> '" + output_filename + "'"
        else:
            comd = "LC_ALL=C sort " + extra + comd + " '" + fin + "' >> '" + output_filename + "'"
        r = os.system(comd)
        if r != 0:
            print >> sys.stderr, "ERROR (sort_ttdb.py) while running:"
            print >> sys.stderr, comd
            sys.exit(1)

    if input_filename == '-':
        os.remove(fin)

    if output_filename == '-':
        fod = sys.stdout
        fid = open(fon, 'r')
        while True:
            gc.disable()
            lines = fid.readlines(10**8)
            gc.enable()
            if not lines:
                break
            fod.writelines(lines)
        fid.close()
        os.remove(fon)
# # 1. Import and Reshape Data
# First we load the necessary Python packages and then we import the CSV files that were provided by Instacart.
#
# ## 1.1 Import the required packages
# The garbage collector (package gc), attempts to reclaim garbage, or memory occupied by objects (e.g., DataFrames) that are no longer in use by Python ([ref1](https://www.techopedia.com/definition/1083/garbage-collection-gc-general-programming), [ref2](https://en.wikipedia.org/wiki/Garbage_collection_(computer_science)). This package will eliminate our risk to exceed the 16GB threshold of available RAM that Kaggle offers.
#
# The **"as"** reserved word is to define an alias to the package. The alias help us to call easier a package in our code.

# In[2]:

# For data manipulation
import pandas as pd

# Garbage Collector to free up memory
import gc
gc.enable()  # Activate

# ## 1.2 Load data from the CSV files
# Instacart provides 6 CSV files, which we have to load into Python. Towards this end, we use the .read_csv() function, which is included in the Pandas package. Reading in data with the .read_csv( ) function returns a DataFrame.

# In[3]:

orders = pd.read_csv('orders.csv')
order_products_train = pd.read_csv('order_products__train.csv')
order_products_prior = pd.read_csv('order_products__prior.csv')
products = pd.read_csv('products.csv')
aisles = pd.read_csv('aisles.csv')
departments = pd.read_csv('departments.csv')

# This step results in the following DataFrames:
# * <b>orders</b>: This table includes all orders, namely prior, train, and test. It has single primary key (<b>order_id</b>).
Beispiel #57
0
    def _fork(self, path, uid, gid, executable, args, environment, **kwargs):
        """
        Fork and then exec sub-process.

        @param path: the path where to run the new process.
        @type path: L{bytes} or L{unicode}
        @param uid: if defined, the uid used to run the new process.
        @type uid: L{int}
        @param gid: if defined, the gid used to run the new process.
        @type gid: L{int}
        @param executable: the executable to run in a new process.
        @type executable: L{str}
        @param args: arguments used to create the new process.
        @type args: L{list}.
        @param environment: environment used for the new process.
        @type environment: L{dict}.
        @param kwargs: keyword arguments to L{_setupChild} method.
        """
        collectorEnabled = gc.isenabled()
        gc.disable()
        try:
            self.pid = os.fork()
        except:
            # Still in the parent process
            if collectorEnabled:
                gc.enable()
            raise
        else:
            if self.pid == 0:
                # A return value of 0 from fork() indicates that we are now
                # executing in the child process.

                # Do not put *ANY* code outside the try block. The child
                # process must either exec or _exit. If it gets outside this
                # block (due to an exception that is not handled here, but
                # which might be handled higher up), there will be two copies
                # of the parent running in parallel, doing all kinds of damage.

                # After each change to this code, review it to make sure there
                # are no exit paths.

                try:
                    # Stop debugging. If I am, I don't care anymore.
                    sys.settrace(None)
                    self._setupChild(**kwargs)
                    self._execChild(path, uid, gid, executable, args,
                                    environment)
                except:
                    # If there are errors, try to write something descriptive
                    # to stderr before exiting.

                    # The parent's stderr isn't *necessarily* fd 2 anymore, or
                    # even still available; however, even libc assumes that
                    # write(2, err) is a useful thing to attempt.

                    try:
                        stderr = os.fdopen(2, 'wb')
                        msg = ("Upon execvpe {0} {1} in environment id {2}"
                               "\n:").format(executable, str(args),
                                             id(environment))

                        if _PY3:

                            # On Python 3, print_exc takes a text stream, but
                            # on Python 2 it still takes a byte stream.  So on
                            # Python 3 we will wrap up the byte stream returned
                            # by os.fdopen using TextIOWrapper.

                            # We hard-code UTF-8 as the encoding here, rather
                            # than looking at something like
                            # getfilesystemencoding() or sys.stderr.encoding,
                            # because we want an encoding that will be able to
                            # encode the full range of code points.  We are
                            # (most likely) talking to the parent process on
                            # the other end of this pipe and not the filesystem
                            # or the original sys.stderr, so there's no point
                            # in trying to match the encoding of one of those
                            # objects.

                            stderr = io.TextIOWrapper(stderr, encoding="utf-8")

                        stderr.write(msg)
                        traceback.print_exc(file=stderr)
                        stderr.flush()

                        for fd in xrange(3):
                            os.close(fd)
                    except:
                        # Handle all errors during the error-reporting process
                        # silently to ensure that the child terminates.
                        pass

                # See comment above about making sure that we reach this line
                # of code.
                os._exit(1)

        # we are now in parent process
        if collectorEnabled:
            gc.enable()
        self.status = -1  # this records the exit status of the child