예제 #1
0
def main(image_size=10):
    print('Loading pairs data and converting to images')
    with open('../../data/training-flipped/CEdata_train_pairs.csv', 'r') as pairs_data_file:
        pairs_header = pairs_data_file.readline()
        pairs_body = pairs_data_file.readlines()
    Inps = np.zeros([len(pairs_body), image_size ** 2])
    prog = Progress(len(pairs_body))
    for (i, line) in enumerate(pairs_body):
        A = np.array([float(a) for a in line.strip().split(',')[1].strip().split(' ')])
        B = np.array([float(b) for b in line.strip().split(',')[2].strip().split(' ')])
        Inps[i,:] = pairs_to_image(A, B, image_size)
        prog.tick()
    prog.done()
    
    print('Loading validation data and converting to images')
    with open('../../data/validation/CEfinal_valid_pairs.csv', 'r') as valid_data_file:
        valid_header = valid_data_file.readline()
        valid_body = valid_data_file.readlines()
    validInps = np.zeros([len(valid_body), image_size ** 2])
    prog = Progress(len(valid_body))
    for (i, line) in enumerate(valid_body):
        A = np.array([float(a) for a in line.strip().split(',')[1].strip().split(' ')])
        B = np.array([float(b) for b in line.strip().split(',')[2].strip().split(' ')])
        validInps[i,:] = pairs_to_image(A, B, image_size)
        prog.tick()
    prog.done()
    
    print('Saving data to MATLAB format')
    
    scipy.io.savemat('images_10_pit.mat', {'train_images' : Inps, 'valid_images' : validInps})
예제 #2
0
파일: dbn.py 프로젝트: evolu8/gdbn
 def fineTune(self, minibatchStream, epochs, mbPerEpoch, loss = None, progressBar = True, useDropout = False):
     for ep in range(epochs):
         totalCases = 0
         sumErr = 0
         sumLoss = 0
         if self.nesterov:
             step = self.stepNesterov
         else:
             step = self.step
         prog = Progress(mbPerEpoch) if progressBar else DummyProgBar()
         for i in range(mbPerEpoch):
             inpMB, targMB = minibatchStream.next()
             usemaxNorm = False
             usenoises = False
             if ep > 6:
               usemaxNorm = True
               usenoises = True
             err, outMB = step(inpMB, targMB, self.learnRates, self.momentum, self.L2Costs, useDropout, usemaxNorm, usenoises)
             sumErr += err
             if loss != None:
                 sumLoss += loss(targMB, outMB)
             totalCases += inpMB.shape[0]
             prog.tick()
         prog.done()
         yield sumErr/float(totalCases), sumLoss/float(totalCases)            
예제 #3
0
def read_valid_pairs():
    valid_path = get_paths()["valid_pairs_path"]
    with open(valid_path, 'r') as pairs_data_file:
        pairs_header = pairs_data_file.readline()
        pairs_body = pairs_data_file.readlines()
    pairs = {}
    prog = Progress(len(pairs_body))
    for line in pairs_body:
        A = np.array([float(a) for a in line.strip().split(',')[1].strip().split(' ')])
        B = np.array([float(b) for b in line.strip().split(',')[2].strip().split(' ')])
        pairs[line.split(',')[0]] = (A, B)
        prog.tick()
    prog.done()
    return pairs
예제 #4
0
def apply_features(data, features):
    #### TODO - can I be made more efficient?
    prog = Progress(len(data))
    output = {key : np.zeros(len(features)) for key in data.keys()}
    for (key, (A, B)) in data.iteritems():
        for (j, (name, variable, f)) in enumerate(features):
            if variable == 'A':
                value = f(A)
            elif variable == 'B':
                value = f(B)
            elif variable == 'derived':
                value = eval(f)
            else:
                value = f(A, B)
            output[key][j] = value
        prog.tick()
    prog.done()
    return output
예제 #5
0
 def fineTune(self,
              minibatchStream,
              epochs,
              mbPerEpoch,
              loss=None,
              progressBar=True,
              useDropout=False):
     for ep in range(epochs):
         totalCases = 0
         sumErr = 0
         sumLoss = 0
         if self.nesterov:
             step = self.stepNesterov
         else:
             step = self.step
         prog = Progress(mbPerEpoch) if progressBar else DummyProgBar()
         for i in range(mbPerEpoch):
             if isinstance(self.outputActFunct, LinearMasked):
                 inpMB, targMB, targMaskMB = minibatchStream.next()
                 err, outMB = step(inpMB, targMB, self.learnRates,
                                   self.momentum, self.L2Costs, useDropout,
                                   targMaskMB)
             else:
                 inpMB, targMB = minibatchStream.next()
                 err, outMB = step(inpMB, targMB, self.learnRates,
                                   self.momentum, self.L2Costs, useDropout)
             sumErr += err
             if loss != None:
                 sumLoss += loss(targMB, outMB)
             totalCases += inpMB.shape[0]
             prog.tick()
         prog.done()
         yield sumErr / float(totalCases), sumLoss / float(totalCases)
예제 #6
0
파일: dbn.py 프로젝트: evoup/cause-effect
 def fineTune(self, minibatchStream, epochs, mbPerEpoch, loss = None, progressBar = True, useDropout = False):
     for ep in range(epochs):
         totalCases = 0
         sumErr = 0
         sumLoss = 0
         #### TODO - What is nesterov?
         if self.nesterov:
             step = self.stepNesterov
         else:
             step = self.step
         prog = Progress(mbPerEpoch) if progressBar else DummyProgBar()
         for i in range(mbPerEpoch):
             inpMB, targMB = minibatchStream.next()
             #### TODO - different version of step for when using droupout or not
             err, outMB = step(inpMB, targMB, self.learnRates, self.momentum, self.L2Costs, useDropout)
             sumErr += err
             if loss != None:
                 sumLoss += loss(targMB, outMB)
             totalCases += inpMB.shape[0]
             prog.tick()
         prog.done()
         yield sumErr/float(totalCases), sumLoss/float(totalCases)            
예제 #7
0
def reverse_it(overwrite=False):
    
    if (not overwrite) and os.path.exists('training-reversed/CEdata_train_pairs.csv') :
        print 'Output already exists - not overwriting'
        return
        
    # Open info
    with open('training/CEdata_train_publicinfo.csv', 'r') as info_data_file:
        info_header = info_data_file.readline()
        info_body = info_data_file.readlines()
    # Reverse it (no change)
    original_length = len(info_body)
    for i in range(original_length):
        info_body.append(','.join(['train%d' % (len(info_body)+1)] + info_body[i].split(',')[1:]))
        info_body.append(','.join(['train%d' % (len(info_body)+1)] + info_body[i].split(',')[1:]))
        info_body.append(','.join(['train%d' % (len(info_body)+1)] + info_body[i].split(',')[1:]))
        
    # Open targets
    with open('training/CEdata_train_target.csv', 'r') as target_data_file:
        target_header = target_data_file.readline()
        target_body = target_data_file.readlines()
    # Reverse it - no change
    original_length = len(target_body)
    for i in range(original_length):
        target_body.append(','.join(['train%d' % (len(target_body)+1)] + target_body[i].split(',')[1:]))
        target_body.append(','.join(['train%d' % (len(target_body)+1)] + target_body[i].split(',')[1:]))
        target_body.append(','.join(['train%d' % (len(target_body)+1)] + target_body[i].split(',')[1:]))
        
    # Open pairs
    with open('training/CEdata_train_pairs.csv', 'r') as pairs_data_file:
        pairs_header = pairs_data_file.readline()
        # Write reversed lines to temporary file
        with open('temp.csv', 'w') as temp_file:
            temp_file.write(pairs_header)
            prog = Progress(original_length * 5)
            for line in pairs_data_file:
                A = np.array([float(a) for a in line.strip().split(',')[1].strip().split(' ')])
                B = np.array([float(b) for b in line.strip().split(',')[2].strip().split(' ')])
                if set(A) == set([0, 1]):
                    A_reversed = 1 - A
                else:
                    A_reversed = 2 * np.mean(A) - A
                if set(B) == set([0, 1]):
                    B_reversed = 1 - B
                else:
                    B_reversed = 2 * np.mean(B) - B
                temp_file.write(','.join(['dummy-id'] + [' '.join(str(a) for a in A_reversed)] + [' '.join(str(b) for b in B)]) + '\n')
                temp_file.write(','.join(['dummy-id'] + [' '.join(str(a) for a in A)] + [' '.join(str(b) for b in B_reversed)]) + '\n')
                temp_file.write(','.join(['dummy-id'] + [' '.join(str(a) for a in A_reversed)] + [' '.join(str(b) for b in B_reversed)]) + '\n')
                prog.tick()
                
    # Concatenate original pairs and temporary file 
    with open('training-reversed/CEdata_train_pairs.csv', 'w') as pairs_data_file:
        pairs_data_file.write(pairs_header)
        i = 1
        for file_name in ['training/CEdata_train_pairs.csv', 'temp.csv']:
            with open(file_name, 'r') as input_file:
                input_file.readline()
                for line in input_file:
                    pairs_data_file.write(','.join(['train%d' % i] + line.split(',')[1:]))
                    prog.tick()
                    i += 1
    prog.done()    
    os.remove('temp.csv') 
    # Save other files
        
    with open('training-reversed/CEdata_train_target.csv', 'w') as target_data_file:
        target_data_file.write(target_header + ''.join(target_body))
        
    with open('training-reversed/CEdata_train_publicinfo.csv', 'w') as info_data_file:
        info_data_file.write(info_header + ''.join(info_body))
예제 #8
0
def flip_it(overwrite=False):
    
    if (not overwrite) and os.path.exists('training-flipped/CEdata_train_pairs.csv') :
        print 'Output already exists - not overwriting'
        return
        
    # Open info
    with open('training/CEdata_train_publicinfo.csv', 'r') as info_data_file:
        info_header = info_data_file.readline()
        info_body = info_data_file.readlines()
    # Flip it
    original_length = len(info_body)
    prog = Progress(original_length)
    for i in range(original_length):
        info_body.append(','.join(['train%d' % (len(info_body)+1)] + list(reversed(info_body[i].strip().split(',')[1:]))) + '\n')
        prog.tick()
    prog.done()
        
    # Open targets
    with open('training/CEdata_train_target.csv', 'r') as target_data_file:
        target_header = target_data_file.readline()
        target_body = target_data_file.readlines()
    # Flip it
    original_length = len(target_body)
    prog = Progress(original_length)
    for i in range(original_length):
        targets = target_body[i].split(',')[1:]
        if targets[0] == '1':
            targets[0] = '-1'
        elif targets[0] == '-1':
            targets[0] = '1'
        if targets[1] == '1\n':
            targets[1] = '2\n'
        elif targets[1] == '2\n':
            targets[1] = '1\n'
        target_body.append(','.join(['train%d' % (len(target_body)+1)] + targets))
        prog.tick()
    prog.done()
    
    # Open pairs
    with open('training/CEdata_train_pairs.csv', 'r') as pairs_data_file:
        pairs_header = pairs_data_file.readline()
        with open('temp.csv', 'w') as temp_file:
            temp_file.write(pairs_header)
            prog = Progress(original_length * 3)
            # Save flipped lines to temporary file
            for line in pairs_data_file:
                temp_file.write(','.join(['dummy-id'] + list(reversed(line.strip().split(',')[1:]))) + '\n')
                prog.tick()
        
    # Concatenate original pairs and temporary file 
    with open('training-flipped/CEdata_train_pairs.csv', 'w') as pairs_data_file:
        pairs_data_file.write(pairs_header)
        i = 1
        for file_name in ['training/CEdata_train_pairs.csv', 'temp.csv']:
            with open(file_name, 'r') as input_file:
                input_file.readline()
                for line in input_file:
                    pairs_data_file.write(','.join(['train%d' % i] + line.split(',')[1:]))
                    prog.tick()
                    i += 1

    # Save other files
        
    with open('training-flipped/CEdata_train_target.csv', 'w') as target_data_file:
        target_data_file.write(target_header + ''.join(target_body))
        
    with open('training-flipped/CEdata_train_publicinfo.csv', 'w') as info_data_file:
        info_data_file.write(info_header + ''.join(info_body))
예제 #9
0
def run_batch_locally(scripts, language='python', paths=[], max_cpu=0.9, max_mem=0.9, submit_sleep=1, job_check_sleep=30, \
                      verbose=True, max_files_open=100, max_running_jobs=10, single_thread=True):
    '''
    Receives a list of python scripts to run

    Assumes the code has an output file that will be managed by this function
    
    Returns a list of local file names where the code has presumably stored output
    '''
    # Define some code constants
    #### Do we need to set paths explicitly?

    #### This will be deprecated in future MATLAB - hopefully the -singleCompThread command is sufficient
    matlab_single_thread = '''
maxNumCompThreads(1);
'''

    python_path_code = '''
import sys
sys.path.append('%s')
'''

    matlab_path_code = '''
addpath(genpath('%s'))
'''
       
    python_completion_code = '''
print 'Writing completion flag'
with open('%(flag_file)s', 'w') as f:
    f.write('Goodbye, World')
print "Goodbye, World"
quit()
'''
  
    #### TODO - Is this completely stable       
    matlab_completion_code = '''
fprintf('\\nWriting completion flag\\n');
ID = fopen('%(flag_file)s', 'w');
fprintf(ID, 'Goodbye, world');
fclose(ID);
fprintf('\\nGoodbye, World\\n');
quit()
'''
    
    # Initialise lists of file locations job ids
    shell_files = [None] * len(scripts)
    script_files = [None] * len(scripts)
    output_files = [None] * len(scripts)
    stdout_files = [None] * len(scripts)
    stdout_file_handles = [None] * len(scripts)
    flag_files = [None] * len(scripts)
    processes = [None] * len(scripts)
    fear_finished = False
    job_finished = [False] * len(scripts)  
    
    files_open = 0

    # Loop through jobs, submitting jobs whenever CPU usage low enough, re-submitting failed jobs
    if not verbose:
        prog = Progress(len(scripts))
    while not fear_finished:
        should_sleep = True
        for (i, code) in enumerate(scripts):
            if (not job_finished[i]) and (processes[i] is None) and (files_open <= max_files_open) and (len([1 for p in processes if not p is None]) < max_running_jobs):
                # This script has not been run - check CPU and potentially run
                #### FIXME - Merge if statements
                if (psutil.cpu_percent() < max_cpu * 100) and (psutil.virtual_memory().percent < max_mem * 100):
                    # Jobs can run
                    should_sleep = False
                    # Get the job ready
                    if LOCATION == 'local':
                        temp_dir = LOCAL_TEMP_PATH
                    else:
                        temp_dir = HOME_TEMP_PATH
                    if language == 'python':
                        script_files[i] = (mkstemp_safe(temp_dir, '.py'))
                    elif language == 'matlab':
                        script_files[i] = (mkstemp_safe(temp_dir, '.m'))
                    # Create necessary files in local path
                    shell_files[i] = (mkstemp_safe(temp_dir, '.sh'))
                    output_files[i] = (mkstemp_safe(temp_dir, '.out'))
                    stdout_files[i] = (mkstemp_safe(temp_dir, '.o'))
                    flag_files[i] = (mkstemp_safe(temp_dir, '.flg'))
                    # Customise code
                    #### TODO - make path and output_transfer optional
                    if language == 'python':
                        code = code + python_completion_code
                        for path in paths:
                            code = (python_path_code % path) + code
                    elif language == 'matlab':
                        code = code + matlab_completion_code
                        for path in paths:
                            code = (matlab_path_code % path) + code
                    code = code % {'output_file': output_files[i],
                                   'flag_file' : flag_files[i]}
                    # Write code and shell file
                    with open(script_files[i], 'w') as f:
                        f.write(code)
                    with open(shell_files[i], 'w') as f:
                        #### TODO - is os.path.join always correct - what happens if this program is being run on windows?
                        if language == 'python':
                            f.write('python ' + script_files[i] + '\n')
                        elif language == 'matlab':
                            if LOCATION == 'home':
                                matlab_path = HOME_MATLAB
                            else:
                                matlab_path = LOCAL_MATLAB
                            if single_thread:
                                f.write('cd ' + os.path.split(script_files[i])[0] + ';\n' + matlab_path + ' -nosplash -nojvm -nodisplay -singleCompThread -r ' + \
                                        os.path.split(script_files[i])[-1].split('.')[0] + '\n')
                            else:
                                f.write('cd ' + os.path.split(script_files[i])[0] + ';\n' + matlab_path + ' -nosplash -nojvm -nodisplay -r ' + \
                                        os.path.split(script_files[i])[-1].split('.')[0] + '\n')
                    # Start running the job
                    if verbose:
                        print 'Submitting job %d of %d' % (i + 1, len(scripts))
                    stdout_file_handles[i] = open(stdout_files[i], 'w')
                    files_open = files_open + 1
                    processes[i] = subprocess.Popen(['sh', shell_files[i]], stdout = stdout_file_handles[i]);
                    # Sleep for a bit so the process can kick in (prevents 100s of jobs being sent to processor)
                    time.sleep(submit_sleep)
            elif (not job_finished[i]) and (not processes[i] is None):
                # Ask the process how its doing
                processes[i].poll()
                # Check to see if the process has completed
                if not processes[i].returncode is None:
                    if os.path.isfile(flag_files[i]):
                        job_finished[i] = True
                        if verbose:
                            print 'Job %d of %d has completed' % (i + 1, len(scripts))
                        else:
                            prog.tick()
                    else:
                        if verbose:
                            print 'Job %d has failed - will try again later' % i + 1
                        processes[i] = None
                    # Tidy up temp files
                    os.remove(script_files[i])
                    os.remove(shell_files[i])
                    stdout_file_handles[i].close()
                    files_open = files_open - 1
                    os.remove(stdout_files[i])
                    os.remove(flag_files[i])
                    processes[i] = None
                    # Something useful happened
                    should_sleep = False
        if all(job_finished):
            fear_finished = True 
            if not verbose: 
                prog.done()  
        elif should_sleep:
            # Count how many jobs are queued
            n_queued = 0
            # Count how many jobs are running
            n_running = 0
            if verbose:
                # print '%d jobs running' % n_running
                # print '%d jobs queued' % n_queued
                print 'Sleeping for %d seconds' % job_check_sleep
            time.sleep(job_check_sleep)

    #### TODO - return job output and error files as applicable (e.g. there may be multiple error files associated with one script)
    return output_files
예제 #10
0
    def fineTune(self,
                 minibatchStream,
                 trainInps,
                 epochs,
                 mbPerEpoch,
                 loss=None,
                 validSet=False,
                 progressBar=True,
                 useDropout=False):
        for ep in xrange(epochs):
            print
            print 'learnRates:', self.learnRates
            totalCases = 0
            sumErr = 0
            sumLoss = 0
            if self.nesterov:
                step = self.stepNesterov
            else:
                step = self.step
            prog = Progress(mbPerEpoch) if progressBar else DummyProgBar()
            for i in range(mbPerEpoch):
                # print 'Epoch:', ep, 'minibatch', i

                (inpMB, targMB, mbgraph) = minibatchStream.next()
                if len(targMB.shape
                       ) != 3:  # Convert to a cubic matrix (3d matrix)
                    targMB = targMB.reshape(-1, 1, targMB.shape[1])

                # Each dimensions of inpMB (3d), refers to a pivot vector. Now, we want to select
                # training samples that falls in the neighborhood of this guy, and store in the
                # corresponding dimension of xsl (x_selected).

                xsl = np.zeros((mbgraph.indx.shape[0], mbgraph.indx.shape[1],
                                trainInps.shape[1]))
                for j in xrange(mbgraph.indx.shape[0]):
                    xsl[j] = trainInps[mbgraph.indx[j] - 1]
                    # -1 because I need to covert the indices from matlab format to python

                #distribute graph.vals to 3d
                vals_select = mbgraph.vals  #It has been converted to 3d inside manifold.py
                del mbgraph

                err = step(xsl, vals_select, inpMB, targMB, self.learnRates,
                           self.momentum, self.L2Costs, useDropout)
                # gnp.free_reuse_cache()

                sumErr += err
                # print err, sumErr
                totalCases += inpMB.shape[0]
                prog.tick()
            prog.done()
            self.learnRates = [
                y * self.learnRatesMultiplier for y in self.learnRates
            ]
            # If validation set is given
            if validSet:
                val_outputActs = self.fprop_xf(validSet['trainInps'])
                val_error = self.outputActFunct.error(
                    gnp.garray(validSet['trainTargs']), self.state[-1],
                    val_outputActs)
                yield sumErr / float(
                    totalCases), val_error / validSet['trainInps'].shape[0]
            else:
                yield sumErr / float(totalCases)
예제 #11
0
def run_batch_locally(scripts, language='python', paths=[], max_cpu=0.9, max_mem=0.9, submit_sleep=1, job_check_sleep=30, \
                      verbose=True, max_files_open=100, max_running_jobs=10, single_thread=True):
    '''
    Receives a list of python scripts to run

    Assumes the code has an output file that will be managed by this function
    
    Returns a list of local file names where the code has presumably stored output
    '''
    # Define some code constants
    #### Do we need to set paths explicitly?

    #### This will be deprecated in future MATLAB - hopefully the -singleCompThread command is sufficient
    matlab_single_thread = '''
maxNumCompThreads(1);
'''

    python_path_code = '''
import sys
sys.path.append('%s')
'''

    matlab_path_code = '''
addpath(genpath('%s'))
'''

    python_completion_code = '''
print 'Writing completion flag'
with open('%(flag_file)s', 'w') as f:
    f.write('Goodbye, World')
print "Goodbye, World"
quit()
'''

    #### TODO - Is this completely stable
    matlab_completion_code = '''
fprintf('\\nWriting completion flag\\n');
ID = fopen('%(flag_file)s', 'w');
fprintf(ID, 'Goodbye, world');
fclose(ID);
fprintf('\\nGoodbye, World\\n');
quit()
'''

    # Initialise lists of file locations job ids
    shell_files = [None] * len(scripts)
    script_files = [None] * len(scripts)
    output_files = [None] * len(scripts)
    stdout_files = [None] * len(scripts)
    stdout_file_handles = [None] * len(scripts)
    flag_files = [None] * len(scripts)
    processes = [None] * len(scripts)
    fear_finished = False
    job_finished = [False] * len(scripts)

    files_open = 0

    # Loop through jobs, submitting jobs whenever CPU usage low enough, re-submitting failed jobs
    if not verbose:
        prog = Progress(len(scripts))
    while not fear_finished:
        should_sleep = True
        for (i, code) in enumerate(scripts):
            if (not job_finished[i]) and (processes[i] is None) and (
                    files_open <= max_files_open) and (len([
                        1 for p in processes if not p is None
                    ]) < max_running_jobs):
                # This script has not been run - check CPU and potentially run
                #### FIXME - Merge if statements
                if (psutil.cpu_percent() < max_cpu * 100) and (
                        psutil.virtual_memory().percent < max_mem * 100):
                    # Jobs can run
                    should_sleep = False
                    # Get the job ready
                    if LOCATION == 'local':
                        temp_dir = LOCAL_TEMP_PATH
                    else:
                        temp_dir = HOME_TEMP_PATH
                    if language == 'python':
                        script_files[i] = (mkstemp_safe(temp_dir, '.py'))
                    elif language == 'matlab':
                        script_files[i] = (mkstemp_safe(temp_dir, '.m'))
                    # Create necessary files in local path
                    shell_files[i] = (mkstemp_safe(temp_dir, '.sh'))
                    output_files[i] = (mkstemp_safe(temp_dir, '.out'))
                    stdout_files[i] = (mkstemp_safe(temp_dir, '.o'))
                    flag_files[i] = (mkstemp_safe(temp_dir, '.flg'))
                    # Customise code
                    #### TODO - make path and output_transfer optional
                    if language == 'python':
                        code = code + python_completion_code
                        for path in paths:
                            code = (python_path_code % path) + code
                    elif language == 'matlab':
                        code = code + matlab_completion_code
                        for path in paths:
                            code = (matlab_path_code % path) + code
                    code = code % {
                        'output_file': output_files[i],
                        'flag_file': flag_files[i]
                    }
                    # Write code and shell file
                    with open(script_files[i], 'w') as f:
                        f.write(code)
                    with open(shell_files[i], 'w') as f:
                        #### TODO - is os.path.join always correct - what happens if this program is being run on windows?
                        if language == 'python':
                            f.write('python ' + script_files[i] + '\n')
                        elif language == 'matlab':
                            if LOCATION == 'home':
                                matlab_path = HOME_MATLAB
                            else:
                                matlab_path = LOCAL_MATLAB
                            if single_thread:
                                f.write('cd ' + os.path.split(script_files[i])[0] + ';\n' + matlab_path + ' -nosplash -nojvm -nodisplay -singleCompThread -r ' + \
                                        os.path.split(script_files[i])[-1].split('.')[0] + '\n')
                            else:
                                f.write('cd ' + os.path.split(script_files[i])[0] + ';\n' + matlab_path + ' -nosplash -nojvm -nodisplay -r ' + \
                                        os.path.split(script_files[i])[-1].split('.')[0] + '\n')
                    # Start running the job
                    if verbose:
                        print 'Submitting job %d of %d' % (i + 1, len(scripts))
                    stdout_file_handles[i] = open(stdout_files[i], 'w')
                    files_open = files_open + 1
                    processes[i] = subprocess.Popen(
                        ['sh', shell_files[i]], stdout=stdout_file_handles[i])
                    # Sleep for a bit so the process can kick in (prevents 100s of jobs being sent to processor)
                    time.sleep(submit_sleep)
            elif (not job_finished[i]) and (not processes[i] is None):
                # Ask the process how its doing
                processes[i].poll()
                # Check to see if the process has completed
                if not processes[i].returncode is None:
                    if os.path.isfile(flag_files[i]):
                        job_finished[i] = True
                        if verbose:
                            print 'Job %d of %d has completed' % (i + 1,
                                                                  len(scripts))
                        else:
                            prog.tick()
                    else:
                        if verbose:
                            print 'Job %d has failed - will try again later' % i + 1
                        processes[i] = None
                    # Tidy up temp files
                    os.remove(script_files[i])
                    os.remove(shell_files[i])
                    stdout_file_handles[i].close()
                    files_open = files_open - 1
                    os.remove(stdout_files[i])
                    os.remove(flag_files[i])
                    processes[i] = None
                    # Something useful happened
                    should_sleep = False
        if all(job_finished):
            fear_finished = True
            if not verbose:
                prog.done()
        elif should_sleep:
            # Count how many jobs are queued
            n_queued = 0
            # Count how many jobs are running
            n_running = 0
            if verbose:
                # print '%d jobs running' % n_running
                # print '%d jobs queued' % n_queued
                print 'Sleeping for %d seconds' % job_check_sleep
            time.sleep(job_check_sleep)

    #### TODO - return job output and error files as applicable (e.g. there may be multiple error files associated with one script)
    return output_files