Esempio n. 1
0
    def __init__(self, filename):
        """
        Initialize a FileService and create train, valid, and test filenames from the given base filename.
        If a file exists with the same name, delete it.

        Parameters
        ----------
        filename : str
            Base filepath to use for the train, valid, and test files.
        """
        assert isinstance(
            filename, string_types
        ), "input filename needs to be a string, found %s" % str(
            type(filename))
        self.value_separator = os.linesep
        filename = os.path.realpath(filename)
        basedir = os.path.dirname(filename)
        mkdir_p(basedir)
        # create the appropriate train, valid, test versions of the file
        name = os.path.basename(filename)
        name, ext = os.path.splitext(name)
        self.train_filename = os.path.join(basedir, name + '_train' + ext)
        self.valid_filename = os.path.join(basedir, name + '_valid' + ext)
        self.test_filename = os.path.join(basedir, name + '_test' + ext)
        # delete the files if they already exist
        if os.path.exists(self.train_filename):
            os.remove(self.train_filename)
        if os.path.exists(self.value_separator):
            os.remove(self.valid_filename)
        if os.path.exists(self.test_filename):
            os.remove(self.test_filename)
Esempio n. 2
0
def ensure_downloads(url=DATA_URL,target_dir=DEFAULT_CODEGOLF_DATASET_PATH):
    """Ensure that all of the given files have been downloaded and/or unpacked"""
    file_ops.mkdir_p( target_dir )
    expected = os.path.join( target_dir, 'train','yes0.wav')
    if not os.path.exists( expected ):
        archive = os.path.join( target_dir, TAR_FILE )
        if not os.path.exists( archive ) or os.stat( archive ).st_size != FILE_SIZE:
            log.info("Downloading codegolf dataset to %s", target_dir )
            if not file_ops.download_file(
                DATA_URL,
                archive,
            ):
                raise RuntimeError( "Unable to download %s to %s"%(
                    DATA_URL,
                    archive,
                ))
        if sys.version_info.major == 3:
            log.info("Using Python 3.x lzma support to unpack")
            file_ops.untar(archive, target_dir, mode='r:xz')
        else:
            log.warn("Attempting decompresion/unpacking via tar command" )
            subprocess.check_call( ['tar', '-xJf', archive])
        if not os.path.exists( expected ):
            raise RuntimeError("Untarring the source file did not create %s"%(expected,))
    log.info("CodeGolf Yes/No dataset is installed in %s"%(target_dir,))
    return True
Esempio n. 3
0
    def __init__(self, filename):
        """
        Initialize a FileService and create empty train, valid, and test files from the given base filename.

        Parameters
        ----------
        filename : str
            Base filepath to use for the train, valid, and test files.
        """
        assert isinstance(filename, string_types), "input filename needs to be a string, found %s" % str(type(filename))
        self.value_separator = os.linesep
        filename = os.path.realpath(filename)
        basedir = os.path.dirname(filename)
        mkdir_p(basedir)
        # create the appropriate train, valid, test versions of the file
        name = os.path.basename(filename)
        name, ext = os.path.splitext(name)
        self.train_filename = os.path.join(basedir, name+'_train'+ext)
        self.valid_filename = os.path.join(basedir, name+'_valid'+ext)
        self.test_filename  = os.path.join(basedir, name+'_test'+ext)
        # init the files to be empty
        with open(self.train_filename, 'wb') as f:
            f.write('')
        with open(self.valid_filename, 'wb') as f:
            f.write('')
        with open(self.test_filename, 'wb') as f:
            f.write('')
Esempio n. 4
0
    def __init__(self, filename):
        """
        Initialize a FileService and create train, valid, and test filenames from the given base filename.
        If a file exists with the same name, delete it.

        Parameters
        ----------
        filename : str
            Base filepath to use for the train, valid, and test files.
        """
        assert isinstance(filename, string_types), "input filename needs to be a string, found %s" % str(type(filename))
        self.value_separator = os.linesep
        filename = os.path.realpath(filename)
        basedir = os.path.dirname(filename)
        mkdir_p(basedir)
        # create the appropriate train, valid, test versions of the file
        name = os.path.basename(filename)
        name, ext = os.path.splitext(name)
        self.train_filename = os.path.join(basedir, name+'_train'+ext)
        self.valid_filename = os.path.join(basedir, name+'_valid'+ext)
        self.test_filename  = os.path.join(basedir, name+'_test'+ext)
        # delete the files if they already exist
        if os.path.exists(self.train_filename):
            os.remove(self.train_filename)
        if os.path.exists(self.value_separator):
            os.remove(self.valid_filename)
        if os.path.exists(self.test_filename):
            os.remove(self.test_filename)
Esempio n. 5
0
    def __init__(self, filename):
        """
        Initialize a FileService and create empty train, valid, and test files from the given base filename.

        Parameters
        ----------
        filename : str
            Base filepath to use for the train, valid, and test files.
        """
        assert isinstance(
            filename, string_types
        ), "input filename needs to be a string, found %s" % str(
            type(filename))
        self.value_separator = os.linesep
        filename = os.path.realpath(filename)
        basedir = os.path.dirname(filename)
        mkdir_p(basedir)
        # create the appropriate train, valid, test versions of the file
        name = os.path.basename(filename)
        name, ext = os.path.splitext(name)
        self.train_filename = os.path.join(basedir, name + '_train' + ext)
        self.valid_filename = os.path.join(basedir, name + '_valid' + ext)
        self.test_filename = os.path.join(basedir, name + '_test' + ext)
        # init the files to be empty
        with open(self.train_filename, 'wb') as f:
            f.write('')
        with open(self.valid_filename, 'wb') as f:
            f.write('')
        with open(self.test_filename, 'wb') as f:
            f.write('')
Esempio n. 6
0
def ensure_downloads(files,base_url=BASE_URL,target_dir=DEFAULT_LIBRISPEECH_DATASET_PATH):
    """Ensure that all of the given files have been downloaded and/or unpacked"""
    log.info("Downloading librispeech to %s", target_dir )
    file_ops.mkdir_p( target_dir )
    for filename in files:
        final_filename = os.path.join( target_dir, filename )
        log.info("Ensuring download: %s", final_filename)
        filesize = FILE_SIZES.get( filename, 'Unknown Size')
        size_desc = file_ops.human_bytes(filesize) if isinstance(filesize,(long,int)) else filesize
        if filename in DIRECTORY_NAMES:
            without_extension = os.path.join( target_dir, DIRECTORY_NAMES[filename])
        else:
            without_extension = final_filename[:-7]
        
        if not os.path.exists( without_extension ):
            if (not os.path.exists( final_filename )) or not( os.stat(final_filename).st_size == filesize):
                final_url = base_url + filename
                log.info("Need to download %s (%s)", final_url,size_desc )
                if not file_ops.download_file(
                    final_url,
                    final_filename,
                ):
                    raise RuntimeError("Unable to download %s to %s"%(
                        final_url,final_filename,
                    ))
            working = tempfile.mkdtemp(dir=target_dir,prefix="unpack-",suffix="-tmp")
            try:
                file_ops.untar(final_filename, working)
                text_files = []
                for name in glob.glob(os.path.join(working,'LibriSpeech','*')):
                    if os.path.basename( name ) == os.path.basename(without_extension):
                        os.rename( name, without_extension )
                    elif os.path.splitext(name)[1].upper() == '.TXT':
                        text_files.append( name )
                    else:
                        log.warn("Unexpected directory in %s: %r",final_filename, name)
                for text_file in text_files:
                    os.rename( text_file, os.path.join( without_extension, os.path.basename(text_file)))
                if not os.path.exists( without_extension ):
                    raise RuntimeError(
                        "Unable to find the directory %s expected from %s"%(
                            without_extension,
                            final_filename,
                        )
                    )
            finally:
                shutil.rmtree( working )
Esempio n. 7
0
    def setUp(self):
        # create test directories and files
        self.base = "test_filestream_dir/"
        train = self.base + "train/"
        valid = self.base + "valid/"
        test = self.base + "test/"
        mkdir_p(train)
        mkdir_p(valid)
        mkdir_p(test)
        # some files
        with open(train + "train1.txt", "w") as f:
            f.write("TRAIN1a\ntrain1b\n\n")
        with open(train + "train2.txt", "w") as f:
            f.write("TRAIN2a\ntrain2b\n\n")
        with open(train + "train3.md", "w") as f:
            f.write("TRAIN3a\ntrain3b\n\n")

        with open(valid + "valid1.txt", "w") as f:
            f.write("valid1a\nvalid1b\n\n")
        with open(valid + "valid2.txt", "w") as f:
            f.write("valid2a\nvalid2b\n\n")
        with open(valid + "valid3.md", "w") as f:
            f.write("valid3a\nvalid3b\n\n")

        with open(test + "test1.txt", "w") as f:
            f.write("test1a\ntest1b\n\n")
        with open(test + "test2.txt", "w") as f:
            f.write("test2a\ntest2b\n\n")
        with open(test + "test3.md", "w") as f:
            f.write("test3a\ntest3b\n\n")
    def setUp(self):
        # create test directories and files
        self.base = "test_filestream_dir/"
        train = self.base+"train/"
        valid = self.base+"valid/"
        test = self.base+"test/"
        mkdir_p(train)
        mkdir_p(valid)
        mkdir_p(test)
        # some files
        with open(train+"train1.txt", "w") as f:
            f.write("TRAIN1a\ntrain1b\n\n")
        with open(train + "train2.txt", "w") as f:
            f.write("TRAIN2a\ntrain2b\n\n")
        with open(train + "train3.md", "w") as f:
            f.write("TRAIN3a\ntrain3b\n\n")

        with open(valid + "valid1.txt", "w") as f:
            f.write("valid1a\nvalid1b\n\n")
        with open(valid + "valid2.txt", "w") as f:
            f.write("valid2a\nvalid2b\n\n")
        with open(valid + "valid3.md", "w") as f:
            f.write("valid3a\nvalid3b\n\n")

        with open(test + "test1.txt", "w") as f:
            f.write("test1a\ntest1b\n\n")
        with open(test + "test2.txt", "w") as f:
            f.write("test2a\ntest2b\n\n")
        with open(test + "test3.md", "w") as f:
            f.write("test3a\ntest3b\n\n")
Esempio n. 9
0
    def setUp(self):
        # get a logger for this session
        self.log = logging.getLogger(__name__)
        self.dir = "filedataset_test_files"
        self.single_file_dir = os.path.join(self.dir, "target_in_file")
        mkdir_p(self.single_file_dir)
        # create files
        self.data0 = [[1,2,3,4,5],[2,3,4,5,6]]
        self.data1 = [[6,7,8,9,0],[7,8,9,0,1]]
        with open(os.path.join(self.single_file_dir, "1.txt"), 'w') as f:
            f.write("1,2,3,4,5\t0\n6,7,8,9,0\t1")
        with open(os.path.join(self.single_file_dir, "2.txt"), 'w') as f:
            f.write("2,3,4,5,6\t0\n7,8,9,0,2\t1")

        self.data_files = os.path.join(self.dir, "target_in_filename")
        mkdir_p(self.data_files)
        # create files
        self.cat = "I am a feline! \nMeow."
        self.dog = "I am a canine! \nWoof."
        with open(os.path.join(self.data_files, "cat.txt"), 'w') as f:
            f.write(self.cat)
        with open(os.path.join(self.data_files, "dog.txt"), 'w') as f:
            f.write(self.dog)
Esempio n. 10
0
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 input_size=None, output_size=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs_hook and hiddens_hook (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs_hook is a tuple of (shape, variable) that should replace the default model inputs.
        hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        input_size : int or shape tuple
            The dimensionality of the input for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer.
        output_size : int or shape tuple
            The dimensionality of the output for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
            run the size from Theano's graph, so it needs to be explicit.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and output_size.
        """
        log.info("Creating a new instance of %s", str(type(self)))

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs_hook  = inputs_hook
        self.hiddens_hook = hiddens_hook
        self.params_hook  = params_hook
        self.input_size   = input_size
        self.output_size  = output_size
        self.outdir       = outdir

        # make sure outdir ends in a directory separator
        if self.outdir and self.outdir[-1] != os.sep:
            self.outdir += os.sep

        # Combine arguments that could specify input_size -> overwrite input_size with inputs_hook[0] if it exists.
        if self.inputs_hook and self.inputs_hook[0] is not None:
            self.input_size = self.inputs_hook[0]

        # Check if the input_size wasn't provided - if this is the case, it could either be a programmer's error
        # or it could be during the automatic stacking in a Container. Since that is a common use case, set
        # the input_size to 1 to avoid errors when instantiating the model.
        if not self.input_size:
            # Could be error, or more commonly, when adding models to a Container
            log.warning("No input_size or inputs_hook! Make sure this is done in a Container. Setting input_size"
                        "=1 for the Container now...")
            self.input_size = 1

        # Also, check if no output_size was given - this could be the case for generative models. Copy input_size
        # in that case.
        if not self.output_size:
            # Could be an error (hopefully not), so give the warning.
            log.warning("No output_size given! Make sure this is from a generative model (where output_size is the "
                        "same as input_size. Setting output_size=input_size now...")
            self.output_size = self.input_size

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['input_size']  = self.input_size
        self.args['output_size'] = self.output_size

        # Now create the directory for outputs of the model
        # set up base path for the outputs of the model during training, etc.
        self.args['outdir'] = self.outdir
        if self.args['outdir']:
            mkdir_p(self.args['outdir'])

        # log the arguments.
        log.info("%s self.args: %s", str(type(self)), str(self.args))
        # save the arguments.
        self.save_args()
Esempio n. 11
0
    def __init__(self, inputs=None, hiddens=None, outputs=None,
                 params=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs and hiddens SharedVariables (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs can have a tuple of (shape, variable) that should replace the default model inputs.
        hiddens can have a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs : List of [int or shape_tuple or Tuple of (shape, SharedVariable) or None]
            The dimensionality of the inputs for this model, and/or the routing information for the model
            to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the inputs i.e. n_in).
        hiddens : List of [int or shape_tuple or Tuple of (shape, SharedVariable) or None], optional
            The dimensionality of the hidden representation for this model, and/or the routing information for
            the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the hiddens i.e. n_hidden).
        outputs : List of [int or shape tuple], optional
            The dimensionality of the output(s) for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
            run the size from Theano's graph, so it needs to be explicit.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str, optional
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict, optional
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and outputs.
        """
        self._classname = self.__class__.__name__
        log.info("Creating a new instance of %s", self._classname)

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs = raise_to_list(inputs)
        self.hiddens = raise_to_list(hiddens)
        self.output_size = raise_to_list(outputs)
        self.params = params
        self.outdir = outdir

        # make the directory to output configuration and parameters from the model
        if self.outdir:
            self.outdir = os.path.realpath(self.outdir)
            mkdir_p(self.outdir)

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['inputs'] = self.inputs
        self.args['hiddens'] = self.hiddens
        self.args['output_size'] = self.output_size
        self.args['params'] = self.params
        self.args['outdir'] = self.outdir

        # log the arguments.
        log.info("%s self.args: %s", self._classname, str(self.args))
        # save the arguments.
        self.save_args()
        # Boom! Hyperparameters are now dealt with. Take that!

        # Don't know the position of switches!
        self.switches_on = None
Esempio n. 12
0
    def __init__(self, inputs=None, hiddens=None, outputs=None,
                 params=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs and hiddens Theano symbolic expressions
        or variables (if applicable) to set your inputs and hidden representation in a modular fashion,
        allowing models to link together. `inputs` can have a tuple of (shape, variable) that should replace
        the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the
        default model hidden representation (which means you need to adapt creating your computation graph
        to not care about the inputs and to instead run outputs directly from the hidden variable provided).
        You can also accept a params to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`) or Model] or None
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)]. If a :class:`Model` is given as the input, it replaces
            the tuple with zip(Model.output_size, Model.get_outputs()).
        hiddens : List of [tuple(shape, `Theano.TensorType`) or shape] or None, optional
            The dimensionality of the hidden representation for this model, and/or the routing information for
            the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). This shape
            information is the same format as the monad for `inputs`.
        outputs : List of [int or shape tuple], optional
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str, optional
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict, optional
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and outputs.
        """
        self._classname = self.__class__.__name__
        log.info("Creating a new instance of %s", self._classname)

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'output_size') and hasattr(input, 'get_outputs'):
                    sizes = raise_to_list(input.output_size)
                    outs = raise_to_list(input.get_outputs())
                    if len(sizes) == 1 and len(sizes) < len(outs):
                        sizes = sizes * len(outs)
                    input = raise_to_list(zip(sizes, outs))
                    for i in input:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.hiddens = raise_to_list(hiddens)
        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.params = params or {}
        self.outdir = outdir

        # make the directory to output configuration and parameters from the model
        if self.outdir:
            self.outdir = os.path.realpath(self.outdir)
            mkdir_p(self.outdir)

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['inputs'] = self.inputs
        self.args['hiddens'] = self.hiddens
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        self.args['params'] = self.params
        self.args['outdir'] = self.outdir

        # log the arguments.
        log.info("%s self.args: %s", self._classname, str(self.args))
        # save the arguments.
        self.save_args()
        # Boom! Hyperparameters are now dealt with. Take that!

        # Don't know the position of switches!
        self.switches_on = None
Esempio n. 13
0
    def __init__(self, config=None, defaults=None,
                 inputs_hook=None, hiddens_hook=None, params_hook=None,
                 output_size=None,
                 outdir=None,
                 **kwargs):
        """
        This creates the model's combined configuration params from config and defaults into a self.args
        dictionary-like object (meaning it implements collections.Mapping and you can use self.args.get('parameter')
        to access something).

        Further, your model implementations should accept optional inputs_hook and hiddens_hook (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs_hook is a tuple of (shape, variable) that should replace the default model inputs.
        hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters.
        ------------------

        :param config: A dictionary-like object containing all the necessary user-defined parameters for the model.
        This means it either implements collections.Mapping or is a file path to a JSON or YAML configuration file.
        :type config: collections.Mapping object or String (.json file path or .yaml file path)

        :param defaults: A dictionary-like object containing all the necessary default parameters for the model.
        This means it either implements collections.Mapping or is a file path to a JSON or YAML configuration file.
        :type defaults: collections.Mapping object or String (.json file path or .yaml file path)

        :param inputs_hook: Routing information for the model to accept inputs from elsewhere. This is used for linking
        different models together (e.g. setting the Sigmoid model's input layer to the DAE's hidden layer gives a
        newly supervised classification model). For now, you need to include the shape information (normally the
        dimensionality of the input i.e. n_in).
        :type inputs_hook: Tuple of (shape, variable)

        :param hiddens_hook: Routing information for the model to accept its hidden representation from elsewhere.
        This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
        output layer gives the RNN-GSN model, a deep recurrent model.) For now, you need to include the shape
        information (normally the dimensionality of the hiddens i.e. n_hidden).
        :type hiddens_hook: Tuple of (shape, variable)

        :param params_hook: A list of model parameters (shared theano variables) that you should use when constructing
        this model (instead of initializing your own shared variables). This parameter is useful when you want to have
        two versions of the model that use the same parameters - such as a training model with dropout applied to layers
        and one without for testing, where the parameters are shared between the two.
        :type params_hook: List(theano shared variable)

        :param output_size: the dimensionality of the output for this model. This is required for stacking models
        automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
        run the size from Theano's graph, so it needs to be explicit. This parameter can be None if it is specified
        in the default or config dictionaries.
        :type output_size: int

        :param outdir: the directory you want outputs (parameters, images, etc.) to save to.
        :type outdir: string

        :param kwargs: this will be all the other left-over parameters passed to the class as a dictionary of
        {param: value}. We will use the kwargs to finally combine defaults, config, and passed parameters together
        into the self.args dict, making each model's parameters accessible by name in self.args
        :type kwargs: dict
        """
        log.info("Creating a new instance of %s", str(type(self)))

        # set self.args to be the combination of the defaults and the config dictionaries
        self.args = combine_config_and_defaults(config, defaults)

        # if the args are none, make it a blank dictionary
        if self.args is None:
            self.args = {}

        # now, go through the inputs_hook, hiddens_hook, params_hook, and output_size to add them to self.args
        # if the variable isn't None, override the argument from config/default. (or add it if it doesn't exist)
        if inputs_hook is not None or 'inputs_hook' not in self.args:
            self.args['inputs_hook'] = inputs_hook

        if hiddens_hook is not None or 'hiddens_hook' not in self.args:
            self.args['hiddens_hook'] = hiddens_hook

        if params_hook is not None or 'params_hook' not in self.args:
            self.args['params_hook'] = params_hook

        if output_size is not None or 'output_size' not in self.args:
            self.args['output_size'] = output_size

        # set the overall default outdir to outputs/
        if outdir is not None or 'outdir' not in self.args:
            self.args['outdir'] = outdir
        if self.args['outdir'] is None:
            self.args['outdir'] = 'outputs/'

        # Now create the directory for outputs of the model
        # set up base path for the outputs of the model during training, etc.
        mkdir_p(self.args['outdir'])


        # now that our required variables are out of the way, do the same thing for everything else passed via kwargs
        for arg, val in kwargs.items():
            if (val is not None or str(arg) not in self.args) and str(arg) != 'kwargs':
                self.args[str(arg)] = val
            # flatten kwargs if it was passed as a variable
            elif str(arg) == 'kwargs':
                inner_kwargs = kwargs['kwargs']
                for key, item in inner_kwargs.items():
                    if item is not None or str(key) not in self.args:
                        self.args[str(key)] = item

        # Magic! Now self.args contains the combination of all the initialization variables, overridden like so:
        # defaults < config < kwargs (explicits passed to model's __init__)

        # Do a check if both input_size and inputs_hook are None (this should only happen in Prototype)
        if self.args.get("input_size") is None and self.args.get('inputs_hook') is None:
            log.warning("Both input_size and inputs_hook are None! Make sure this is only happening in a Prototype! "
                        "Setting input_size to 1 for convenience to the Prototype.")
            self.args['input_size'] = 1

        # Finally, to make things really easy, update the class 'self' with everything in self.args to make
        # all the parameters accessible via self.<param>
        self.__dict__.update(self.args)

        # log the arguments.
        log.debug("%s self.args: %s", str(type(self)), str(self.args))
        # save the arguments.
        self.save_args()
Esempio n. 14
0
    def install(self):
        '''
        Method to both download and extract the dataset from the internet (if there) or verify connection settings
        '''
        file_type = None
        if self.filename is not None:
            log.info('Installing dataset %s', str(self.filename))
            # construct the actual path to the dataset
            prevdir = os.getcwd()
            os.chdir(os.path.split(os.path.realpath(__file__))[0])
            dataset_dir = os.path.realpath(self.dataset_dir)
            try:
                mkdir_p(dataset_dir)
                dataset_location = os.path.join(dataset_dir, self.filename)
            except Exception as e:
                log.error("Couldn't make the dataset path with directory %s and filename %s",
                          dataset_dir,
                          str(self.filename))
                log.exception("%s", str(e))
                dataset_location = None
            finally:
                os.chdir(prevdir)

            # check if the dataset is already in the source, otherwise download it.
            # first check if the base filename exists - without all the extensions.
            # then, add each extension on and keep checking until the upper level, when you download from http.
            if dataset_location is not None:
                (dirs, fname) = os.path.split(dataset_location)
                split_fname = fname.split('.')
                accumulated_name = split_fname[0]
                found = False
                # first check if the filename was a directory (like for the midi datasets)
                if os.path.exists(os.path.join(dirs, accumulated_name)):
                    found = True
                    file_type = get_file_type(os.path.join(dirs, accumulated_name))
                    dataset_location = os.path.join(dirs, accumulated_name)
                    log.debug('Found file %s', dataset_location)
                # now go through the file extensions starting with the lowest level and check if the file exists
                if not found and len(split_fname) > 1:
                    for chunk in split_fname[1:]:
                        accumulated_name = '.'.join((accumulated_name, chunk))
                        file_type = get_file_type(os.path.join(dirs, accumulated_name))
                        if file_type is not None:
                            dataset_location = os.path.join(dirs, accumulated_name)
                            log.debug('Found file %s', dataset_location)
                            break

            # if the file wasn't found, download it if a source was provided. Otherwise, raise error.
            download_success = True
            if self.source is not None:
                if file_type is None:
                    download_success = download_file(self.source, dataset_location)
                    file_type = get_file_type(dataset_location)
            else:
                log.error("Filename %s couldn't be found, and no URL source to download was provided.",
                          str(self.filename))
                raise RuntimeError("Filename %s couldn't be found, and no URL source to download was provided." %
                                   str(self.filename))

            # if the file type is a zip, unzip it.
            unzip_success = True
            if file_type is files.ZIP:
                (dirs, fname) = os.path.split(dataset_location)
                post_unzip = os.path.join(dirs, '.'.join(fname.split('.')[0:-1]))
                unzip_success = files.unzip(dataset_location, post_unzip)
                # if the unzip was successful
                if unzip_success:
                    # remove the zipfile and update the dataset location and file type
                    log.debug('Removing file %s', dataset_location)
                    os.remove(dataset_location)
                    dataset_location = post_unzip
                    file_type = get_file_type(dataset_location)
            if download_success and unzip_success:
                log.info('Installation complete. Yay!')
            else:
                log.warning('Something went wrong installing dataset. Boo :(')

            return dataset_location, file_type
Esempio n. 15
0
def config_root_logger(config_file='logging_config.json'):
    """
    Configures the root logger (returned from get_root_logger()) to the specifications in the JSON file `config_file`.

    Parameters
    ----------
    config_file : str
        The string path to the configuration JSON file to use.
    """
    # this could be called from scripts anywhere, but we want to keep the log-related items in this directory.
    # therefore, change the cwd to this file's directory and then change back at the end.
    prevdir = os.path.realpath(os.getcwd())
    os.chdir(os.path.split(os.path.realpath(__file__))[0])

    # load the basic parameters from the JSON configuration file
    # config_file = os.path.join(os.path.split(os.path.realpath(__file__))[0], config_file)

    path = config_file
    env_key = 'LOG_CFG'
    value = os.getenv(env_key, None)
    if value:
        path = value
    # if the configuration exists
    init = True
    if os.path.exists(path):
        with open(path, 'rt') as f:
            try:
                config = json.load(f)
            except:
                logging.basicConfig(level=logging.DEBUG)
                logger = get_root_logger()
                logger.exception(
                    'Exception in reading the JSON logging config file!')
                logger.warning(
                    'Anyway, loading the basicConfig for the logger instead.')
                init = False

        if init:
            # make the file paths to the log files
            for handler in config.get('handlers', None):
                if handler is not None:
                    path = config.get('handlers').get(handler).get('filename')
                    if path is not None:
                        path = os.path.normpath(path)
                        (dirs, _) = os.path.split(path)
                        if len(dirs) is not 0:
                            # dirs = os.path.join(os.path.split(os.path.realpath(__file__))[0], dirs)
                            try:
                                mkdir_p(dirs)
                            except:
                                logging.basicConfig(level=logging.DEBUG)
                                logger = get_root_logger()
                                logger.exception(
                                    'Exception in creating the directory for a logging handler! '
                                    'Path was {0!s}'.format(
                                        os.path.realpath(dirs)))
                                logger.warning(
                                    'Anyway, loading the basicConfig for the logger instead.'
                                )
                                init = False

            # load the configuration into the logging module
            if init:
                try:
                    logging.config.dictConfig(config)
                except:
                    logging.basicConfig(level=logging.DEBUG)
                    logger = get_root_logger()
                    logger.exception(
                        'Exception in loading the JSON logging config file to the logging module!'
                    )
                    logger.warning(
                        'Anyway, loading the basicConfig for the logger instead.'
                    )

    # otherwise, couldn't find the configuration file
    else:
        logging.basicConfig(level=logging.DEBUG)
        logger = get_root_logger()
        logger.warning(
            "Could not find configuration file for logger! Was looking for {0!s}. "
            "Using basicConfig instead...".format(os.path.realpath(path)))

    # change the directory to the calling file's working directory
    os.chdir(prevdir)
Esempio n. 16
0
    def install(self):
        '''
        Method to both download and extract the dataset from the internet (if applicable) or verify that the file
        exists in the dataset_dir.

        Returns
        -------
        str
            The absolute path to the dataset location on disk.
        int
            The integer representing the file type for the dataset, as defined in the opendeep.utils.file_ops module.
        '''
        file_type = None
        if self.filename is not None:
            log.info('Installing dataset %s', str(self.filename))
            # construct the actual path to the dataset
            prevdir = os.getcwd()
            os.chdir(os.path.split(os.path.realpath(__file__))[0])
            dataset_dir = os.path.realpath(self.dataset_dir)
            try:
                mkdir_p(dataset_dir)
                dataset_location = os.path.join(dataset_dir, self.filename)
            except Exception as e:
                log.error("Couldn't make the dataset path with directory %s and filename %s",
                          dataset_dir,
                          str(self.filename))
                log.exception("%s", str(e))
                dataset_location = None
            finally:
                os.chdir(prevdir)

            # check if the dataset is already in the source, otherwise download it.
            # first check if the base filename exists - without all the extensions.
            # then, add each extension on and keep checking until the upper level, when you download from http.
            if dataset_location is not None:
                (dirs, fname) = os.path.split(dataset_location)
                split_fname = fname.split('.')
                accumulated_name = split_fname[0]
                found = False
                # first check if the filename was a directory (like for the midi datasets)
                if os.path.exists(os.path.join(dirs, accumulated_name)):
                    found = True
                    file_type = get_file_type(os.path.join(dirs, accumulated_name))
                    dataset_location = os.path.join(dirs, accumulated_name)
                    log.debug('Found file %s', dataset_location)
                # now go through the file extensions starting with the lowest level and check if the file exists
                if not found and len(split_fname) > 1:
                    for chunk in split_fname[1:]:
                        accumulated_name = '.'.join((accumulated_name, chunk))
                        file_type = get_file_type(os.path.join(dirs, accumulated_name))
                        if file_type is not None:
                            dataset_location = os.path.join(dirs, accumulated_name)
                            log.debug('Found file %s', dataset_location)
                            break

            # if the file wasn't found, download it if a source was provided. Otherwise, raise error.
            download_success = True
            if self.source is not None:
                if file_type is None:
                    download_success = download_file(self.source, dataset_location)
                    file_type = get_file_type(dataset_location)
            else:
                log.error("Filename %s couldn't be found, and no URL source to download was provided.",
                          str(self.filename))
                raise RuntimeError("Filename %s couldn't be found, and no URL source to download was provided." %
                                   str(self.filename))

            # if the file type is a zip, unzip it.
            unzip_success = True
            if file_type is files.ZIP:
                (dirs, fname) = os.path.split(dataset_location)
                post_unzip = os.path.join(dirs, '.'.join(fname.split('.')[0:-1]))
                unzip_success = files.unzip(dataset_location, post_unzip)
                # if the unzip was successful
                if unzip_success:
                    # remove the zipfile and update the dataset location and file type
                    log.debug('Removing file %s', dataset_location)
                    os.remove(dataset_location)
                    dataset_location = post_unzip
                    file_type = get_file_type(dataset_location)
            if download_success and unzip_success:
                log.info('Installation complete. Yay!')
            else:
                log.warning('Something went wrong installing dataset. Boo :(')

            return dataset_location, file_type
Esempio n. 17
0
    def __init__(self,
                 inputs_hook=None,
                 hiddens_hook=None,
                 params_hook=None,
                 input_size=None,
                 output_size=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs_hook and hiddens_hook (if applicable)
        to set your inputs and hidden representation in a modular fashion, allowing models to link together.
        inputs_hook is a tuple of (shape, variable) that should replace the default model inputs.
        hiddens_hook is a tuple of (shape, variable) that should replace the default model hidden representation
        (which means you need to adapt creating your computation graph to not care about the inputs and to instead
        run outputs directly from the hidden variable provided).
        You can also accept a params_hook to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs_hook : Tuple of (shape, variable)
            Routing information for the model to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). For now, it needs to include the shape information (normally the
            dimensionality of the input i.e. n_in).
        hiddens_hook : Tuple of (shape, variable)
            Routing information for the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, it needs to include the shape
            information (normally the dimensionality of the hiddens i.e. n_hidden).
        params_hook : List(theano shared variable)
            A list of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as a training model with dropout applied
            to layers and one without for testing, where the parameters are shared between the two.
        input_size : int or shape tuple
            The dimensionality of the input for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer.
        output_size : int or shape tuple
            The dimensionality of the output for this model. This is required for stacking models
            automatically - where the input to one layer is the output of the previous layer. Currently, we cannot
            run the size from Theano's graph, so it needs to be explicit.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and output_size.
        """
        log.info("Creating a new instance of %s", str(type(self)))

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs_hook = inputs_hook
        self.hiddens_hook = hiddens_hook
        self.params_hook = params_hook
        self.input_size = input_size
        self.output_size = output_size
        self.outdir = outdir

        # make sure outdir ends in a directory separator
        if self.outdir and self.outdir[-1] != os.sep:
            self.outdir += os.sep

        # Combine arguments that could specify input_size -> overwrite input_size with inputs_hook[0] if it exists.
        if self.inputs_hook and self.inputs_hook[0] is not None:
            self.input_size = self.inputs_hook[0]

        # Check if the input_size wasn't provided - if this is the case, it could either be a programmer's error
        # or it could be during the automatic stacking in a Container. Since that is a common use case, set
        # the input_size to 1 to avoid errors when instantiating the model.
        if not self.input_size:
            # Could be error, or more commonly, when adding models to a Container
            log.warning(
                "No input_size or inputs_hook! Make sure this is done in a Container. Setting input_size"
                "=1 for the Container now...")
            self.input_size = 1

        # Also, check if no output_size was given - this could be the case for generative models. Copy input_size
        # in that case.
        if not self.output_size:
            # Could be an error (hopefully not), so give the warning.
            log.warning(
                "No output_size given! Make sure this is from a generative model (where output_size is the"
                "same as input_size. Setting output_size=input_size now...")
            self.output_size = self.input_size

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['output_size'] = self.output_size

        # Now create the directory for outputs of the model
        # set up base path for the outputs of the model during training, etc.
        self.args['outdir'] = self.outdir
        if self.args['outdir']:
            mkdir_p(self.args['outdir'])

        # log the arguments.
        log.info("%s self.args: %s", str(type(self)), str(self.args))
        # save the arguments.
        self.save_args()
Esempio n. 18
0
    def __init__(self,
                 inputs=None,
                 hiddens=None,
                 outputs=None,
                 params=None,
                 outdir=None,
                 **kwargs):
        """
        Initialize a new Model.

        Your model implementations should accept optional inputs and hiddens Theano symbolic expressions
        or variables (if applicable) to set your inputs and hidden representation in a modular fashion,
        allowing models to link together. `inputs` can have a tuple of (shape, variable) that should replace
        the default model inputs. hiddens can have a tuple of (shape, variable) that should replace the
        default model hidden representation (which means you need to adapt creating your computation graph
        to not care about the inputs and to instead run outputs directly from the hidden variable provided).
        You can also accept a params to share model parameters rather than instantiate a new set of parameters.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`) or Model] or None
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. This is used for linking
            different models together (e.g. setting the Softmax model's input layer to the DAE's hidden layer gives a
            newly supervised classification model). `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)]. If a :class:`Model` is given as the input, it replaces
            the tuple with zip(Model.output_size, Model.get_outputs()).
        hiddens : List of [tuple(shape, `Theano.TensorType`) or shape] or None, optional
            The dimensionality of the hidden representation for this model, and/or the routing information for
            the model to accept its hidden representation from elsewhere.
            This is used for linking different models together (e.g. setting the GSN model's hidden layers to the RNN's
            output layer gives the RNN-GSN model, a deep recurrent model.) For now, variable hook tuples need to
            include the shape information (normally the dimensionality of the hiddens i.e. n_hidden). This shape
            information is the same format as the monad for `inputs`.
        outputs : List of [int or shape tuple], optional
            The dimensionality of the output(s) for this model. Shape here is the shape monad described in `inputs`.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str, optional
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        kwargs : dict, optional
            This will be all the other left-over keyword parameters passed to the class as a
            dictionary of {param: value}. These get created into `self.args` along with outdir and outputs.
        """
        self._classname = self.__class__.__name__
        log.info("Creating a new instance of %s", self._classname)

        # Necessary inputs to a Model - these are the minimum requirements for modularity to work.
        self.inputs = raise_to_list(inputs)
        if self.inputs is not None:
            ins = []
            # deal with Models or ModifyLayers being passed as an input.
            for input in self.inputs:
                if hasattr(input, 'output_size') and hasattr(
                        input, 'get_outputs'):
                    sizes = raise_to_list(input.output_size)
                    outs = raise_to_list(input.get_outputs())
                    if len(sizes) == 1 and len(sizes) < len(outs):
                        sizes = sizes * len(outs)
                    input = raise_to_list(zip(sizes, outs))
                    for i in input:
                        ins.append(i)
                else:
                    ins.append(input)
            # replace self.inputs
            self.inputs = ins

        self.hiddens = raise_to_list(hiddens)
        self.output_size = raise_to_list(kwargs.get('output_size', outputs))
        self.params = params or {}
        self.outdir = outdir

        # make the directory to output configuration and parameters from the model
        if self.outdir:
            self.outdir = os.path.realpath(self.outdir)
            mkdir_p(self.outdir)

        # copy all of the parameters from the class into an args (configuration) dictionary
        self.args = {}
        self.args = add_kwargs_to_dict(kwargs.copy(), self.args)

        self.args['inputs'] = self.inputs
        self.args['hiddens'] = self.hiddens
        if self.output_size is not None:
            self.args['output_size'] = self.output_size
        self.args['params'] = self.params
        self.args['outdir'] = self.outdir

        # log the arguments.
        log.info("%s self.args: %s", self._classname, str(self.args))
        # save the arguments.
        self.save_args()
        # Boom! Hyperparameters are now dealt with. Take that!

        # Don't know the position of switches!
        self.switches_on = None
Esempio n. 19
0
def config_root_logger(config_file='logging_config.json'):
    """
    Configures the root logger (returned from get_root_logger()) to the specifications in the JSON file `config_file`.

    Parameters
    ----------
    config_file : str
        The string path to the configuration JSON file to use.
    """
    # this could be called from scripts anywhere, but we want to keep the log-related items in this directory.
    # therefore, change the cwd to this file's directory and then change back at the end.
    prevdir = os.path.realpath(os.getcwd())
    os.chdir(os.path.split(os.path.realpath(__file__))[0])

    # load the basic parameters from the JSON configuration file
    # config_file = os.path.join(os.path.split(os.path.realpath(__file__))[0], config_file)

    path = config_file
    env_key = 'LOG_CFG'
    value = os.getenv(env_key, None)
    if value:
        path = value
    # if the configuration exists
    init = True
    if os.path.exists(path):
        with open(path, 'rt') as f:
            try:
                config = json.load(f)
            except:
                logging.basicConfig(level=logging.DEBUG)
                logger = get_root_logger()
                logger.exception('Exception in reading the JSON logging config file!')
                logger.warning('Anyway, loading the basicConfig for the logger instead.')
                init = False

        if init:
            # make the file paths to the log files
            for handler in config.get('handlers', None):
                if handler is not None:
                    path = config.get('handlers').get(handler).get('filename')
                    if path is not None:
                        path = os.path.normpath(path)
                        (dirs, _) = os.path.split(path)
                        if len(dirs) is not 0:
                            # dirs = os.path.join(os.path.split(os.path.realpath(__file__))[0], dirs)
                            try:
                                mkdir_p(dirs)
                            except:
                                logging.basicConfig(level=logging.DEBUG)
                                logger = get_root_logger()
                                logger.exception('Exception in creating the directory for a logging handler! '
                                                 'Path was {0!s}'.format(os.path.realpath(dirs)))
                                logger.warning('Anyway, loading the basicConfig for the logger instead.')
                                init = False

            # load the configuration into the logging module
            if init:
                try:
                    logging.config.dictConfig(config)
                except:
                    logging.basicConfig(level=logging.DEBUG)
                    logger = get_root_logger()
                    logger.exception('Exception in loading the JSON logging config file to the logging module!')
                    logger.warning('Anyway, loading the basicConfig for the logger instead.')

    # otherwise, couldn't find the configuration file
    else:
        logging.basicConfig(level=logging.DEBUG)
        logger = get_root_logger()
        logger.warning("Could not find configuration file for logger! Was looking for {0!s}. "
                       "Using basicConfig instead...".format(os.path.realpath(path)))

    # change the directory to the calling file's working directory
    os.chdir(prevdir)