Example #1
0
def cleanup():
    """
    Delete keys in old format from the compiledir.

    We define keys in old format as keys that have an ndarray in them.
    Now we use a hash in the keys of the constant data.

    If there is no key left for a compiled module, we delete the module.
    """
    compiledir = theano.config.compiledir
    for directory in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, directory, "key.pkl")
                file = open(filename, 'rb')
                #print file
                try:
                    keydata = cPickle.load(file)
                    for key in list(keydata.keys):
                        for obj in flatten(key):
                            if isinstance(obj, numpy.ndarray):
                                keydata.remove_key(key)
                                break
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))

                except EOFError:
                    print ("ERROR while reading this key file '%s'."
                           " Delete its directory" % filename)
            except IOError:
                pass
        finally:
            if file is not None:
                file.close()
Example #2
0
    def __init__(self, window_size=10, t_in=3, w=10, h=10, d=1, t_out=3, hidden_layers_sizes=[3]):
        '''
        初期化する
        :param window_size:
        :param t_in: DNN に入力する過去のデータの個数
        :param w: 各データの横幅
        :param h: 各データの高さ
        :param d: 各データのチャンネル数
        :param t_out: DNN から出力する未来のデータの個数
        :param hidden_layers_sizes: 中間層のユニット数
        :return:
        '''
        self.window_size = window_size
        self.t_in = t_in
        self.w = w
        self.h = h
        self.d = d
        self.t_out = t_out
        self.dataset = [ numpy.zeros((d,h,w), dtype=theano.config.floatX) for i in xrange(window_size) ]

        numpy_rng = numpy.random.RandomState(1000)
        theano_rng = RandomStreams(seed=1000)

        # for each value n in hidden_layers_sizes, assume it as a filter of (1,d,sqrt(n),sqrt(n)), which means it has one sqrt(n)*sqrt(n) sized filter
        filter_shapes = []
        k_prev = d
        for k in hidden_layers_sizes:
            filter_shapes += [(k,k_prev,3,3)]
            k_prev = k

        # self.model = dnn.SdAIndividual(numpy_rng, n=n, w=w, h=h, d=d, hidden_layers_sizes=hidden_layers_sizes)
        # self.model = dnn.SdAFullyConnected(numpy_rng, n=n, w=w, h=h, d=d, hidden_layers_sizes=hidden_layers_sizes)

        # StackedLSTM を使う場合は hidden_layers_sizes が [...] + [n_ins] でないといけない.
        # self.model = dnn.StackedLSTM(numpy_rng, t_in=t_in, d=d, w=w, h=h, hidden_layers_sizes=hidden_layers_sizes)

        # StackedConvLSTM では中間層の大きさは入力層と同じ(固定). ただしパラメータ数(フィルタの数, 大きさ)は自由に変えられる.
        # self.model = dnn.StackedConvLSTM(numpy_rng, t_in=t_in, d=d, w=w, h=h, filter_shapes=filter_shapes)

        # EncoderDecoderLSTM を使う場合は hidden_layers_sizes が [n_ins] + [...] + [n_ins] でないといけない.
        # self.model = dnn.EncoderDecoderLSTM(numpy_rng, t_in=t_in, d=d, w=w, h=h, t_out=t_out, hidden_layers_sizes=hidden_layers_sizes)

        # EncoderDecoderConvLSTM では中間層の大きさは入力層と同じ(固定). ただしパラメータ数(フィルタの数, 大きさ)は自由に変えられる.
        self.model = dnn.EncoderDecoderConvLSTM(numpy_rng, theano_rng, t_in=t_in, d=d, w=w, h=h, t_out=t_out, filter_shapes=filter_shapes)

        n_params = sum([p.get_value().size for p in flatten(self.model.dnn.params)])
        print('model: num of parameters={0}'.format(n_params))

        print('Building pretrain function...'),
        self.f_pretrain = self.model.build_pretrain_function()
        print('done')

        print('Building finetune function...'),
        self.f_grad_shared, self.f_update = self.model.build_finetune_function()
        print('done')

        print('Building predict function...'),
        self.f_predict = self.model.build_prediction_function()
        print('done')
Example #3
0
def cleanup():
    """
    Delete keys in old format from the compiledir.

    Old clean up include key in old format:
    1) keys that have an ndarray in them.
       Now we use a hash in the keys of the constant data.
    2) key that don't have the numpy ABI version in them
    3) They do not have a compile version string

    If there is no key left for a compiled module, we delete the module.
    """
    compiledir = theano.config.compiledir
    for directory in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, directory, "key.pkl")
                file = open(filename, 'rb')
                #print file
                try:
                    keydata = cPickle.load(file)
                    for key in list(keydata.keys):
                        have_npy_abi_version = False
                        have_c_compiler = False
                        for obj in flatten(key):
                            if isinstance(obj, numpy.ndarray):
                                keydata.remove_key(key)
                                break
                            elif isinstance(obj, basestring):
                                if obj.startswith('NPY_ABI_VERSION=0x'):
                                    have_npy_abi_version = True
                                elif obj.startswith('c_compiler_str='):
                                    have_c_compiler = True

                        if not have_npy_abi_version or not have_c_compiler:
                            keydata.remove_key(key)
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))

                except EOFError:
                    print(
                        "ERROR while reading this key file '%s'."
                        " Delete its directory" % filename)
            except IOError:
                pass
        finally:
            if file is not None:
                file.close()
Example #4
0
def cleanup():
    """
    Delete keys in old format from the compiledir.

    Old clean up include key in old format:
    1) keys that have an ndarray in them.
       Now we use a hash in the keys of the constant data.
    2) key that don't have the numpy ABI version in them
    3) They do not have a compile version string

    If there is no key left for a compiled module, we delete the module.
    """
    compiledir = theano.config.compiledir
    for directory in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, directory, "key.pkl")
                file = open(filename, 'rb')
                #print file
                try:
                    keydata = cPickle.load(file)
                    for key in list(keydata.keys):
                        have_npy_abi_version = False
                        have_c_compiler = False
                        for obj in flatten(key):
                            if isinstance(obj, numpy.ndarray):
                                keydata.remove_key(key)
                                break
                            elif isinstance(obj, basestring):
                                if obj.startswith('NPY_ABI_VERSION=0x'):
                                    have_npy_abi_version = True
                                elif obj.startswith('c_compiler_str='):
                                    have_c_compiler = True

                        if not have_npy_abi_version or not have_c_compiler:
                            keydata.remove_key(key)
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))

                except EOFError:
                    print ("ERROR while reading this key file '%s'."
                           " Delete its directory" % filename)
            except IOError:
                pass
        finally:
            if file is not None:
                file.close()
Example #5
0
    def build_finetune_function(self, optimizer=O.my_rmsprop):
        '''
        build the finetune function
        :param optimizer: an optimizer to use
        :return:
        '''
        learning_rate = T.scalar('lr', dtype=theano.config.floatX)

        y = self.get_target() # y is of shape (n_timesteps, n_samples, n_feature_maps, height, width)
        z = self.get_output() # z is of shape (n_timesteps, n_samples, n_feature_maps, height, width)

        n_samples = y.shape[1]

        mse = T.sum((y - z)**2) # Mean Square Error
        cee = T.sum(-(y * T.log(z) + (1.0-y) * T.log(1.0-z))) # Cross Entropy Error
        # cee2= T.sum(-(y * T.log(z) + (1.0-y) * T.log(1.0-z))+(y * T.log(y) + (1.0-y) * T.log(1.0-y)))
        cost = cee
        params = flatten(self.dnn.params)
        grads = T.grad(cost, params)

        f_grad_shared, f_update = optimizer(learning_rate, params, grads,
                                            self.dnn.x, self.dnn.mask, self.dnn.y, cost)

        return (f_grad_shared, f_update)
Example #6
0
def cleanup():
    """
    Delete keys in old format from the compiledir.

    Old clean up include key in old format or with old version of the c_code:
    1) keys that have an ndarray in them.
       Now we use a hash in the keys of the constant data.
    2) key that don't have the numpy ABI version in them
    3) They do not have a compile version string

    If there is no key left for a compiled module, we delete the module.

    """
    compiledir = theano.config.compiledir
    for directory in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, directory, "key.pkl")
                file = open(filename, "rb")
                # print file
                try:
                    keydata = pickle.load(file)
                    for key in list(keydata.keys):
                        have_npy_abi_version = False
                        have_c_compiler = False
                        for obj in flatten(key):
                            if isinstance(obj, np.ndarray):
                                # Reuse have_npy_abi_version to
                                # force the removing of key
                                have_npy_abi_version = False
                                break
                            elif isinstance(obj, string_types):
                                if obj.startswith("NPY_ABI_VERSION=0x"):
                                    have_npy_abi_version = True
                                elif obj.startswith("c_compiler_str="):
                                    have_c_compiler = True
                            elif isinstance(
                                    obj,
                                (theano.gof.Op, theano.gof.Type)) and hasattr(
                                    obj, "c_code_cache_version"):
                                v = obj.c_code_cache_version()
                                if v not in [(), None] and v not in key[0]:
                                    # Reuse have_npy_abi_version to
                                    # force the removing of key
                                    have_npy_abi_version = False
                                    break

                        if not have_npy_abi_version or not have_c_compiler:
                            try:
                                # This can happen when we move the compiledir.
                                if keydata.key_pkl != filename:
                                    keydata.key_pkl = filename
                                keydata.remove_key(key)
                            except IOError:
                                _logger.error(
                                    "Could not remove file '%s'. To complete "
                                    "the clean-up, please remove manually "
                                    "the directory containing it.",
                                    filename,
                                )
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))

                except (EOFError, AttributeError):
                    _logger.error(
                        "Could not read key file '%s'. To complete "
                        "the clean-up, please remove manually "
                        "the directory containing it.",
                        filename,
                    )
            except IOError:
                _logger.error(
                    "Could not clean up this directory: '%s'. To complete "
                    "the clean-up, please remove it manually.",
                    directory,
                )
        finally:
            if file is not None:
                file.close()
Example #7
0
def print_compiledir_content():
    """
    print list of %d compiled individual ops in the "theano.config.compiledir"
    """
    max_key_file_size = 1 * 1024 * 1024  # 1M

    compiledir = theano.config.compiledir
    table = []
    table_multiple_ops = []
    table_op_class = {}
    zeros_op = 0
    big_key_files = []
    total_key_sizes = 0
    nb_keys = {}
    for dir in os.listdir(compiledir):
        filename = os.path.join(compiledir, dir, "key.pkl")
        if not os.path.exists(filename):
            continue
        with open(filename, "rb") as file:
            try:
                keydata = pickle.load(file)
                ops = list(
                    set([
                        x for x in flatten(keydata.keys)
                        if isinstance(x, theano.gof.Op)
                    ]))
                # Whatever the case, we count compilations for OP classes.
                for op_class in set([op.__class__ for op in ops]):
                    table_op_class.setdefault(op_class, 0)
                    table_op_class[op_class] += 1
                if len(ops) == 0:
                    zeros_op += 1
                else:
                    types = list(
                        set([
                            x for x in flatten(keydata.keys)
                            if isinstance(x, theano.gof.Type)
                        ]))
                    compile_start = compile_end = float("nan")
                    for fn in os.listdir(os.path.join(compiledir, dir)):
                        if fn.startswith("mod.c"):
                            compile_start = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                        elif fn.endswith(".so"):
                            compile_end = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                    compile_time = compile_end - compile_start
                    if len(ops) == 1:
                        table.append((dir, ops[0], types, compile_time))
                    else:
                        ops_to_str = "[%s]" % ", ".join(
                            sorted(str(op) for op in ops))
                        types_to_str = "[%s]" % ", ".join(
                            sorted(str(t) for t in types))
                        table_multiple_ops.append(
                            (dir, ops_to_str, types_to_str, compile_time))

                size = os.path.getsize(filename)
                total_key_sizes += size
                if size > max_key_file_size:
                    big_key_files.append((dir, size, ops))

                nb_keys.setdefault(len(keydata.keys), 0)
                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass
            except AttributeError:
                _logger.error("Could not read key file '%s'.", filename)

    print_title("Theano cache: %s" % compiledir, overline="=", underline="=")
    print()

    print_title("List of %d compiled individual ops" % len(table),
                underline="+")
    print_title(
        "sub dir/compiletime/Op/set of different associated Theano types",
        underline="-")
    table = sorted(table, key=lambda t: str(t[1]))
    for dir, op, types, compile_time in table:
        print(dir, "%.3fs" % compile_time, op, types)

    print()
    print_title("List of %d compiled sets of ops" % len(table_multiple_ops),
                underline="+")
    print_title(
        "sub dir/compiletime/Set of ops/set of different associated Theano types",
        underline="-",
    )
    table_multiple_ops = sorted(table_multiple_ops, key=lambda t: (t[1], t[2]))
    for dir, ops_to_str, types_to_str, compile_time in table_multiple_ops:
        print(dir, "%.3fs" % compile_time, ops_to_str, types_to_str)

    print()
    print_title(
        ("List of %d compiled Op classes and "
         "the number of times they got compiled" % len(table_op_class)),
        underline="+",
    )
    table_op_class = sorted(table_op_class.items(), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print(op_class, nb)

    if big_key_files:
        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
        big_total_size = sum([sz for _, sz, _ in big_key_files])
        print(("There are directories with key files bigger than %d bytes "
               "(they probably contain big tensor constants)" %
               max_key_file_size))
        print(("They use %d bytes out of %d (total size used by all key files)"
               "" % (big_total_size, total_key_sizes)))

        for dir, size, ops in big_key_files:
            print(dir, size, ops)

    nb_keys = sorted(nb_keys.items())
    print()
    print_title("Number of keys for a compiled module", underline="+")
    print_title("number of keys/number of modules with that number of keys",
                underline="-")
    for n_k, n_m in nb_keys:
        print(n_k, n_m)
    print()
    print(("Skipped %d files that contained 0 op "
           "(are they always theano.scalar ops?)" % zeros_op))
Example #8
0
def print_compiledir_content():
    max_key_file_size = 1 * 1024 * 1024  # 1M

    compiledir = theano.config.compiledir
    table = []
    more_than_one_ops = 0
    zeros_op = 0
    big_key_files = []
    total_key_sizes = 0
    nb_keys = {}
    for dir in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, dir, "key.pkl")
                file = open(filename, 'rb')
                keydata = cPickle.load(file)
                ops = list(
                    set([
                        x for x in flatten(keydata.keys)
                        if isinstance(x, theano.gof.Op)
                    ]))
                if len(ops) == 0:
                    zeros_op += 1
                elif len(ops) > 1:
                    more_than_one_ops += 1
                else:
                    types = list(
                        set([
                            x for x in flatten(keydata.keys)
                            if isinstance(x, theano.gof.Type)
                        ]))
                    table.append((dir, ops[0], types))

                size = os.path.getsize(filename)
                total_key_sizes += size
                if size > max_key_file_size:
                    big_key_files.append((dir, size, ops))

                nb_keys.setdefault(len(keydata.keys), 0)
                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass
        finally:
            if file is not None:
                file.close()

    print "List of %d compiled individual ops in this theano cache %s:" % (
        len(table), compiledir)
    print "sub directory/Op/a set of the different associated Theano type"
    table = sorted(table, key=lambda t: str(t[1]))
    table_op_class = {}
    for dir, op, types in table:
        print dir, op, types
        table_op_class.setdefault(op.__class__, 0)
        table_op_class[op.__class__] += 1

    print
    print(
        "List of %d individual compiled Op classes and "
        "the number of times they got compiled" % len(table_op_class))
    table_op_class = sorted(table_op_class.iteritems(), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print op_class, nb

    if big_key_files:
        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
        big_total_size = sum([size for dir, size, ops in big_key_files])
        print(
            "There are directories with key files bigger than %d bytes "
            "(they probably contain big tensor constants)" % max_key_file_size)
        print(
            "They use %d bytes out of %d (total size used by all key files)"
            "" % (big_total_size, total_key_sizes))

        for dir, size, ops in big_key_files:
            print dir, size, ops

    nb_keys = sorted(nb_keys.iteritems())
    print
    print "Number of keys for a compiled module"
    print "number of keys/number of modules with that number of keys"
    for n_k, n_m in nb_keys:
        print n_k, n_m

    print(
        "Skipped %d files that contained more than"
        " 1 op (was compiled with the C linker)" % more_than_one_ops)
    print(
        "Skipped %d files that contained 0 op "
        "(are they always theano.scalar ops?)" % zeros_op)
Example #9
0
def print_compiledir_content():
    max_key_file_size = 1 * 1024 * 1024  # 1M

    compiledir = theano.config.compiledir
    table = []
    more_than_one_ops = 0
    zeros_op = 0
    big_key_files = []
    total_key_sizes = 0
    nb_keys = {}
    for dir in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, dir, "key.pkl")
                file = open(filename, 'rb')
                keydata = cPickle.load(file)
                ops = list(set([x for x in flatten(keydata.keys)
                                if isinstance(x, theano.gof.Op)]))
                if len(ops) == 0:
                    zeros_op += 1
                elif len(ops) > 1:
                    more_than_one_ops += 1
                else:
                    types = list(set([x for x in flatten(keydata.keys)
                                      if isinstance(x, theano.gof.Type)]))
                    table.append((dir, ops[0], types))

                size = os.path.getsize(filename)
                total_key_sizes += size
                if size > max_key_file_size:
                    big_key_files.append((dir, size, ops))

                nb_keys.setdefault(len(keydata.keys), 0)
                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass
        finally:
            if file is not None:
                file.close()

    print "List of %d compiled individual ops in this theano cache %s:" % (
        len(table), compiledir)
    print "sub directory/Op/a set of the different associated Theano type"
    table = sorted(table, key=lambda t: str(t[1]))
    table_op_class = {}
    for dir, op, types in table:
        print dir, op, types
        table_op_class.setdefault(op.__class__, 0)
        table_op_class[op.__class__] += 1

    print
    print ("List of %d individual compiled Op classes and "
           "the number of times they got compiled" % len(table_op_class))
    table_op_class = sorted(table_op_class.iteritems(), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print op_class, nb

    if big_key_files:
        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
        big_total_size = sum([size for dir, size, ops in big_key_files])
        print ("There are directories with key files bigger than %d bytes "
               "(they probably contain big tensor constants)" %
               max_key_file_size)
        print ("They use %d bytes out of %d (total size used by all key files)"
               "" % (big_total_size, total_key_sizes))

        for dir, size, ops in big_key_files:
            print dir, size, ops

    nb_keys = sorted(nb_keys.iteritems())
    print
    print "Number of keys for a compiled module"
    print "number of keys/number of modules with that number of keys"
    for n_k, n_m in nb_keys:
        print n_k, n_m

    print ("Skipped %d files that contained more than"
           " 1 op (was compiled with the C linker)" % more_than_one_ops)
    print ("Skipped %d files that contained 0 op "
           "(are they always theano.scalar ops?)" % zeros_op)
Example #10
0
def cleanup():
    """
    Delete keys in old format from the compiledir.

    Old clean up include key in old format or with old version of the c_code:
    1) keys that have an ndarray in them.
       Now we use a hash in the keys of the constant data.
    2) key that don't have the numpy ABI version in them
    3) They do not have a compile version string

    If there is no key left for a compiled module, we delete the module.
    """
    compiledir = theano.config.compiledir
    for directory in os.listdir(compiledir):
        file = None
        try:
            try:
                filename = os.path.join(compiledir, directory, "key.pkl")
                file = open(filename, 'rb')
                #print file
                try:
                    keydata = cPickle.load(file)
                    for key in list(keydata.keys):
                        have_npy_abi_version = False
                        have_c_compiler = False
                        for obj in flatten(key):
                            if isinstance(obj, numpy.ndarray):
                                have_npy_abi_version = False
                                break
                            elif isinstance(obj, basestring):
                                if obj.startswith('NPY_ABI_VERSION=0x'):
                                    have_npy_abi_version = True
                                elif obj.startswith('c_compiler_str='):
                                    have_c_compiler = True
                            elif (isinstance(obj, (theano.gof.Op, theano.gof.Type)) and
                                  hasattr(obj, 'c_code_cache_version')):
                                v = obj.c_code_cache_version()
                                if v not in [(), None] and v not in key[0]:
                                    have_npy_abi_version = False
                                    break

                        if not have_npy_abi_version or not have_c_compiler:
                            try:
                                #This can happen when we move the compiledir.
                                if keydata.key_pkl != filename:
                                    keydata.key_pkl = filename
                                keydata.remove_key(key)
                            except IOError, e:
                                _logger.error(
                                    "Could not remove file '%s'. To complete "
                                    "the clean-up, please remove manually "
                                    "the directory containing it.",
                                    filename)
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))

                except EOFError:
                    _logger.error(
                        "Could not read key file '%s'. To complete "
                        "the clean-up, please remove manually "
                        "the directory containing it.",
                        filename)
            except IOError:
                _logger.error(
                    "Could not clean up this directory: '%s'. To complete "
                    "the clean-up, please remove it manually.",
                    directory)
        finally:
Example #11
0
def print_compiledir_content():
    """
    print list of %d compiled individual ops in the "theano.config.compiledir"
    """
    max_key_file_size = 1 * 1024 * 1024  # 1M

    compiledir = theano.config.compiledir
    table = []
    table_multiple_ops = []
    table_op_class = {}
    zeros_op = 0
    big_key_files = []
    total_key_sizes = 0
    nb_keys = {}
    for dir in os.listdir(compiledir):
        filename = os.path.join(compiledir, dir, "key.pkl")
        if not os.path.exists(filename):
            continue
        with open(filename, 'rb') as file:
            try:
                keydata = pickle.load(file)
                ops = list(set([x for x in flatten(keydata.keys)
                                if isinstance(x, theano.gof.Op)]))
                # Whatever the case, we count compilations for OP classes.
                for op_class in set([op.__class__ for op in ops]):
                    table_op_class.setdefault(op_class, 0)
                    table_op_class[op_class] += 1
                if len(ops) == 0:
                    zeros_op += 1
                else:
                    types = list(set([x for x in flatten(keydata.keys)
                                      if isinstance(x, theano.gof.Type)]))
                    compile_start = compile_end = float('nan')
                    for fn in os.listdir(os.path.join(compiledir, dir)):
                        if fn.startswith('mod.c'):
                            compile_start = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                        elif fn.endswith('.so'):
                            compile_end = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                    compile_time = compile_end - compile_start
                    if len(ops) == 1:
                        table.append((dir, ops[0], types, compile_time))
                    else:
                        ops_to_str = '[%s]' % ', '.join(sorted(str(op) for op in ops))
                        types_to_str = '[%s]' % ', '.join(sorted(str(t) for t in types))
                        table_multiple_ops.append((dir, ops_to_str, types_to_str, compile_time))

                size = os.path.getsize(filename)
                total_key_sizes += size
                if size > max_key_file_size:
                    big_key_files.append((dir, size, ops))

                nb_keys.setdefault(len(keydata.keys), 0)
                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass
            except AttributeError:
                    _logger.error(
                        "Could not read key file '%s'.",
                        filename)

    print_title("Theano cache: %s" % compiledir, overline='=', underline='=')
    print()

    print_title("List of %d compiled individual ops" % len(table), underline='+')
    print_title("sub dir/compiletime/Op/set of different associated Theano types", underline='-')
    table = sorted(table, key=lambda t: str(t[1]))
    for dir, op, types, compile_time in table:
        print(dir, '%.3fs' % compile_time, op, types)

    print()
    print_title("List of %d compiled sets of ops" % len(table_multiple_ops), underline='+')
    print_title("sub dir/compiletime/Set of ops/set of different associated Theano types", underline='-')
    table_multiple_ops = sorted(table_multiple_ops, key=lambda t: (t[1], t[2]))
    for dir, ops_to_str, types_to_str, compile_time in table_multiple_ops:
        print(dir, '%.3fs' % compile_time, ops_to_str, types_to_str)

    print()
    print_title(("List of %d compiled Op classes and "
                 "the number of times they got compiled" % len(table_op_class)), underline='+')
    table_op_class = sorted(iteritems(table_op_class), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print(op_class, nb)

    if big_key_files:
        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
        big_total_size = sum([sz for _, sz, _ in big_key_files])
        print(("There are directories with key files bigger than %d bytes "
               "(they probably contain big tensor constants)" %
               max_key_file_size))
        print(("They use %d bytes out of %d (total size used by all key files)"
               "" % (big_total_size, total_key_sizes)))

        for dir, size, ops in big_key_files:
            print(dir, size, ops)

    nb_keys = sorted(iteritems(nb_keys))
    print()
    print_title("Number of keys for a compiled module", underline='+')
    print_title("number of keys/number of modules with that number of keys", underline='-')
    for n_k, n_m in nb_keys:
        print(n_k, n_m)
    print()
    print(("Skipped %d files that contained 0 op "
           "(are they always theano.scalar ops?)" % zeros_op))
Example #12
0
def print_compiledir_content():
    """
    print list of %d compiled individual ops in the "theano.config.compiledir"
    """
    max_key_file_size = 1 * 1024 * 1024  # 1M

    compiledir = theano.config.compiledir
    table = []
    more_than_one_ops = 0
    zeros_op = 0
    big_key_files = []
    total_key_sizes = 0
    nb_keys = {}
    for dir in os.listdir(compiledir):
        filename = os.path.join(compiledir, dir, "key.pkl")
        if not os.path.exists(filename):
            continue
        with open(filename, 'rb') as file:
            try:
                keydata = pickle.load(file)
                ops = list(set([x for x in flatten(keydata.keys)
                                if isinstance(x, theano.gof.Op)]))
                if len(ops) == 0:
                    zeros_op += 1
                elif len(ops) > 1:
                    more_than_one_ops += 1
                else:
                    types = list(set([x for x in flatten(keydata.keys)
                                      if isinstance(x, theano.gof.Type)]))
                    compile_start = compile_end = float('nan')
                    for fn in os.listdir(os.path.join(compiledir, dir)):
                        if fn.startswith('mod.c'):
                            compile_start = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                        elif fn.endswith('.so'):
                            compile_end = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                    compile_time = compile_end - compile_start
                    table.append((dir, ops[0], types, compile_time))

                size = os.path.getsize(filename)
                total_key_sizes += size
                if size > max_key_file_size:
                    big_key_files.append((dir, size, ops))

                nb_keys.setdefault(len(keydata.keys), 0)
                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass

    print("List of %d compiled individual ops in this theano cache %s:" % (
        len(table), compiledir))
    print("sub dir/compiletime/Op/set of different associated Theano types")
    table = sorted(table, key=lambda t: str(t[1]))
    table_op_class = {}
    for dir, op, types, compile_time in table:
        print(dir, '%.3fs' % compile_time, op, types)
        table_op_class.setdefault(op.__class__, 0)
        table_op_class[op.__class__] += 1

    print()
    print(("List of %d individual compiled Op classes and "
           "the number of times they got compiled" % len(table_op_class)))
    table_op_class = sorted(iteritems(table_op_class), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print(op_class, nb)

    if big_key_files:
        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
        big_total_size = sum([sz for _, sz, _ in big_key_files])
        print(("There are directories with key files bigger than %d bytes "
               "(they probably contain big tensor constants)" %
               max_key_file_size))
        print(("They use %d bytes out of %d (total size used by all key files)"
               "" % (big_total_size, total_key_sizes)))

        for dir, size, ops in big_key_files:
            print(dir, size, ops)

    nb_keys = sorted(iteritems(nb_keys))
    print()
    print("Number of keys for a compiled module")
    print("number of keys/number of modules with that number of keys")
    for n_k, n_m in nb_keys:
        print(n_k, n_m)

    print(("Skipped %d files that contained more than"
           " 1 op (was compiled with the C linker)" % more_than_one_ops))
    print(("Skipped %d files that contained 0 op "
           "(are they always theano.scalar ops?)" % zeros_op))
Example #13
0
def print_compiledir_content():
    """
    print list of %d compiled individual ops in the "theano.config.compiledir"
    """
    max_key_file_size = 1 * 1024 * 1024  # 1M

    compiledir = theano.config.compiledir
    table = []
    more_than_one_ops = 0
    zeros_op = 0
    big_key_files = []
    total_key_sizes = 0
    nb_keys = {}
    for dir in os.listdir(compiledir):
        filename = os.path.join(compiledir, dir, "key.pkl")
        if not os.path.exists(filename):
            continue
        with open(filename, 'rb') as file:
            try:
                keydata = pickle.load(file)
                ops = list(set([x for x in flatten(keydata.keys)
                                if isinstance(x, theano.gof.Op)]))
                if len(ops) == 0:
                    zeros_op += 1
                elif len(ops) > 1:
                    more_than_one_ops += 1
                else:
                    types = list(set([x for x in flatten(keydata.keys)
                                      if isinstance(x, theano.gof.Type)]))
                    compile_start = compile_end = float('nan')
                    for fn in os.listdir(os.path.join(compiledir, dir)):
                        if fn.startswith('mod.c'):
                            compile_start = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                        elif fn.endswith('.so'):
                            compile_end = os.path.getmtime(
                                os.path.join(compiledir, dir, fn))
                    compile_time = compile_end - compile_start
                    table.append((dir, ops[0], types, compile_time))

                size = os.path.getsize(filename)
                total_key_sizes += size
                if size > max_key_file_size:
                    big_key_files.append((dir, size, ops))

                nb_keys.setdefault(len(keydata.keys), 0)
                nb_keys[len(keydata.keys)] += 1
            except IOError:
                pass

    print("List of %d compiled individual ops in this theano cache %s:" % (
        len(table), compiledir))
    print("sub dir/compiletime/Op/set of different associated Theano types")
    table = sorted(table, key=lambda t: str(t[1]))
    table_op_class = {}
    for dir, op, types, compile_time in table:
        print(dir, '%.3fs' % compile_time, op, types)
        table_op_class.setdefault(op.__class__, 0)
        table_op_class[op.__class__] += 1

    print()
    print(("List of %d individual compiled Op classes and "
           "the number of times they got compiled" % len(table_op_class)))
    table_op_class = sorted(iteritems(table_op_class), key=lambda t: t[1])
    for op_class, nb in table_op_class:
        print(op_class, nb)

    if big_key_files:
        big_key_files = sorted(big_key_files, key=lambda t: str(t[1]))
        big_total_size = sum([sz for _, sz, _ in big_key_files])
        print(("There are directories with key files bigger than %d bytes "
               "(they probably contain big tensor constants)" %
               max_key_file_size))
        print(("They use %d bytes out of %d (total size used by all key files)"
               "" % (big_total_size, total_key_sizes)))

        for dir, size, ops in big_key_files:
            print(dir, size, ops)

    nb_keys = sorted(iteritems(nb_keys))
    print()
    print("Number of keys for a compiled module")
    print("number of keys/number of modules with that number of keys")
    for n_k, n_m in nb_keys:
        print(n_k, n_m)

    print(("Skipped %d files that contained more than"
           " 1 op (was compiled with the C linker)" % more_than_one_ops))
    print(("Skipped %d files that contained 0 op "
           "(are they always theano.scalar ops?)" % zeros_op))