Beispiel #1
0
 def getMemSize(self):
     """
     try to guess the size in memory of a job
     @return: expected size in memory 
     """
     if asizeof is not None:
         return asizeof.asizeof(self)
Beispiel #2
0
 def getMemSize(self):
     """
     try to guess the size in memory of a job
     @return: expected size in memory 
     """
     if asizeof is not None:
         return asizeof.asizeof(self)
Beispiel #3
0
def simsize():
    """
    Return the size of topo.sim reported by asizeof.asizeof().
    This estimate does not currently include any numpy arrays, and
    may also be missing other important items.

    Python 2.6 supports getsizeof() and a __sizeof__ attribute that user
    code can implement, which should provide a more accurate estimate.
    """
    import asizeof,topo
    return asizeof.asizeof(topo.sim)
Beispiel #4
0
def simsize():
    """
    Return the size of topo.sim reported by asizeof.asizeof().
    This estimate does not currently include any numpy arrays, and
    may also be missing other important items.

    Python 2.6 supports getsizeof() and a __sizeof__ attribute that user
    code can implement, which should provide a more accurate estimate.
    """
    import asizeof, topo
    return asizeof.asizeof(topo.sim)
def _generate_file(distribution, N, outFile, relevanceFile=None):
    global _sampling_functions
    genfunc = _sampling_functions[distribution]
    dataset = [genfunc() for i in range(N)]

    print "Generating dataset", outFile
    print "N =", N
    db = Database()
    if hasattr(dataset[0], 'featureNames'):
        db.keys = dataset[0].featureNames()
    else:
        for i, f in enumerate(dataset[0].features()):
            db.keys.append("f" + str(i))
    db.keys.append("label")
    for x in dataset:
        db.entries.append(x.features() + [x.concept()])
    db.writeCSV(outFile)
    print "Nfeatures =", len(db.entries[0]) - 1
    print "Fraction of positive examples =", float(
        sum(f[-1] for f in db.entries)) / len(db.entries)
    print "Size of training set (mb)", float(asizeof(db.entries)) / 1024 / 1024
    print "Size of training example (bytes)", asizeof(db.entries[0])

    if relevanceFile != None:
        dbr = Database()
        dbr.keys = [v + "_relevance" for v in db.keys[:-1]]
        for x in dataset:
            dbr.entries.append([int(v) for v in x.relevance()])

        nrel = 0
        ntotal = 0
        for e in dbr.entries:
            nrel += sum(e)
            ntotal += len(e)
        print "Fraction of relevant features:", float(nrel) / float(ntotal)
        dbr.writeCSV(relevanceFile)
Beispiel #6
0
import heimdall
heimdall = reload(heimdall)
import data_flow
data_flow = reload(data_flow)
import watcher
watcher = reload(watcher)
import optimizer
optimizer = reload(optimizer)
from tests import code2
import asizeof, gc

heim = heimdall.Heimdall(max_nesting=2, prof_shot_time=4)
heim.run('code2.g(5)', globals(), locals())

sizes = []
print "number of objects is ", len(gc.get_objects())
biggest, biggest_size = None, 0
for (i, o) in enumerate(gc.get_objects()):
    if i % 100 == 0:
        print i
    try:
        size = asizeof.asizeof(o)
        if size > 1000000:
            print o, size
            sizes.append((o, size))
        if size > biggest_size:
            print "new biggest", size, o
            biggest, biggest_size = o, size
    except:
        pass
Beispiel #7
0

from pynlpl.statistics import FrequencyList
from pynlpl.textprocessors import crude_tokenizer, Classer
import sys
import codecs
import asizeof

freqlist = FrequencyList()
f = codecs.open(sys.argv[1], 'r','utf-8')
for line in f:
    line = crude_tokenizer(line.strip())
    freqlist.append(line)    
f.close()

print "FREQLIST:               " ,asizeof.asizeof(freqlist)




classer = Classer(freqlist)
print "CLASSER:                " ,asizeof.asizeof(classer)

classer2 = Classer(freqlist, False,True)
print "CLASSER (ONLY DECODER): " ,asizeof.asizeof(classer2)

freqlist2 = FrequencyList()
f = codecs.open(sys.argv[1], 'r','utf-8')
for line in f:
    line = crude_tokenizer(line.strip())
    freqlist2.append(classer.encodeseq(line))    
data_flow = reload(data_flow)
import watcher

watcher = reload(watcher)
import optimizer

optimizer = reload(optimizer)
from tests import code2
import asizeof, gc

heim = heimdall.Heimdall(max_nesting=2, prof_shot_time=4)
heim.run("code2.g(5)", globals(), locals())


sizes = []
print "number of objects is ", len(gc.get_objects())
biggest, biggest_size = None, 0
for (i, o) in enumerate(gc.get_objects()):
    if i % 100 == 0:
        print i
    try:
        size = asizeof.asizeof(o)
        if size > 1000000:
            print o, size
            sizes.append((o, size))
        if size > biggest_size:
            print "new biggest", size, o
            biggest, biggest_size = o, size
    except:
        pass