def main(expdir, recipe, computing): '''main function''' overwrite = False if os.path.isdir(expdir): text = '' while text not in ('o', 'r'): text = raw_input('%s already exists, do you want to ' 'resume experiment (r) or overwrite (o) ' '(respond with o or r)' % expdir) if text == 'o': overwrite = True else: #create the experiments directory os.makedirs(expdir) #copy the config files if overwrite: shutil.copyfile(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) else: tools.safecopy(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) shutil.copyfile(os.path.join(recipe, 'coder.cfg'), os.path.join(expdir, 'coder.cfg')) shutil.copyfile(os.path.join(recipe, 'structure.xml'), os.path.join(expdir, 'structure.xml')) shutil.copyfile(os.path.join(recipe, 'coder.cfg'), os.path.join(expdir, 'coder.cfg')) shutil.copyfile(os.path.join(recipe, 'structure.xml'), os.path.join(expdir, 'structure.xml')) shutil.copyfile(os.path.join(recipe, 'database.cfg'), os.path.join(expdir, 'database.cfg')) shutil.copyfile(os.path.join(recipe, 'cross_validation_ppall.cfg'), os.path.join(expdir, 'cross_validation_ppall.cfg')) acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(recipe, 'acquisition.cfg')) modelname = acquisitionconf.get('acquisition', 'name') shutil.copyfile( os.path.join(os.getcwd(), 'assist', 'acquisition', 'defaults', modelname + '.cfg'), os.path.join(expdir, modelname + '.cfg')) #read the cross_validation config file expconf = ConfigParser() expconf.read(os.path.join(recipe, 'cross_validation.cfg')) #default conf file default = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults', 'cross_validation.cfg') #apply the defaults if os.path.exists(default): tools.default_conf(expconf, default) expconf = dict(expconf.items('cross_validation')) #read the data config file if not os.path.exists(os.path.join(recipe, 'database.cfg')): raise Exception('cannot find database.cfg in %s' % recipe) dataconf = ConfigParser() dataconf.read(os.path.join(recipe, 'database.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) # filter out all speakers with less than 100 examples # (in the FluentSpeechCommands dataset (~20%)) bad_spks = [] if os.path.exists(os.path.join(recipe, 'FS_linecounts.txt')): for l in open(os.path.join(recipe, 'FS_linecounts.txt')): splitline = l.strip().split(' ') if int(splitline[1]) < 100: bad_spks.append(splitline[0]) print bad_spks for speaker in dataconf.sections(): if speaker in bad_spks: continue print 'speaker: %s' % (speaker) #create the speaker directory if os.path.isdir(os.path.join(expdir, speaker)): if overwrite: shutil.rmtree(os.path.join(expdir, speaker)) os.makedirs(os.path.join(expdir, speaker)) else: os.makedirs(os.path.join(expdir, speaker)) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #read and code all the tasks labelvecs = [] names = [] taskstrings = dict() for line in open(dataconf.get(speaker, 'tasks')): splitline = line.strip().split(' ') name = speaker + '_' + splitline[0] names.append(name) taskstring = ' '.join(splitline[1:]) taskstrings[name] = taskstring task = read_task(taskstring) labelvecs.append(coder.encode(task)) #devide the data into blocks blocksfile = os.path.join(expdir, speaker, 'blocks.pkl') if os.path.exists(blocksfile): with open(blocksfile, 'rb') as fid: blocks = pickle.load(fid) else: blocks = make_blocks(np.array(labelvecs), expconf, dataconf.get(speaker, 'features')) with open(blocksfile, 'wb') as fid: pickle.dump(blocks, fid) #create train-testsets for all experiments #seed the random number generator random.seed(3105) trainids = [None] * (len(blocks) - 1) testids = [None] * (len(blocks) - 1) for b in range(len(blocks) - 1): trainids[b] = [None] * int(expconf['numexp']) testids[b] = [None] * int(expconf['numexp']) for e in range(int(expconf['numexp'])): trainids[b][e] = list( itertools.chain.from_iterable(random.sample(blocks, b + 1))) testids[b][e] = [ x for x in range(len(names)) if x not in trainids[b][e] ] #read the feature files features = dict() for l in open(os.path.join(dataconf.get(speaker, 'features'), 'feats')): splitline = l.strip().split(' ') featname = speaker + '_' + splitline[0] features[featname] = ' '.join(splitline[1:]) #create an expdir for each experiment b = int(expconf['startblocks']) - 1 while True: for e in range(int(expconf['numexp'])): print ' train blocks: %d, experiment %s' % (b + 1, e) #creat the directory subexpdir = os.path.join(expdir, speaker, '%dblocks_exp%d' % (b + 1, e)) if os.path.exists(os.path.join(subexpdir, 'f1')): continue if not os.path.isdir(subexpdir): os.makedirs(subexpdir) #create pointers to the config files tools.symlink(os.path.join(expdir, 'acquisition.cfg'), os.path.join(subexpdir, 'acquisition.cfg')) tools.symlink(os.path.join(expdir, 'coder.cfg'), os.path.join(subexpdir, 'coder.cfg')) tools.symlink(os.path.join(expdir, 'structure.xml'), os.path.join(subexpdir, 'structure.xml')) tools.symlink(os.path.join(expdir, 'database.cfg'), os.path.join(subexpdir, 'database.cfg')) if not os.path.exists(os.path.join(subexpdir, 'trainfeats')): trainutts = [names[i] for i in trainids[b][e]] print 'number of examples: %d' % len(trainutts) testutts = [names[i] for i in testids[b][e]] #create the train and test sets tools.writefile(os.path.join(subexpdir, 'trainfeats'), {utt: features[utt] for utt in trainutts}) tools.writefile( os.path.join(subexpdir, 'traintasks'), {utt: taskstrings[utt] for utt in trainutts}) tools.writefile(os.path.join(subexpdir, 'testfeats'), {utt: features[utt] for utt in testutts}) tools.writefile( os.path.join(subexpdir, 'testtasks'), {utt: taskstrings[utt] for utt in testutts}) if computing in ('condor', 'condor_gpu'): #create the outputs directory if not os.path.isdir(os.path.join(subexpdir, 'outputs')): os.makedirs(os.path.join(subexpdir, 'outputs')) if computing == 'condor_gpu': jobfile = 'run_script_GPU.job' else: jobfile = 'run_script.job' #only submit the job if it not running yet in_queue = os.popen( 'if condor_q -nobatch -wide | grep -q %s; ' 'then echo true; else echo false; fi' % subexpdir).read().strip() == 'true' #submit the condor job if not in_queue: os.system('condor_submit expdir=%s script=train_test' ' assist/condor/%s' % (subexpdir, jobfile)) else: train_test.main(subexpdir) newb = (b + 1) * int(expconf['scale']) + int( expconf['increment']) - 1 newb = min(newb, len(blocks) - 2) if b == newb: break else: b = newb
def execute(node, previous, experiment_folder): """ Execute a task defined by the given node in the experiment graph. Parameters ---------- node : Element The node to be executed. previous : dict (or list of dict) Dictionary of the experiment's running-time variables after the end of the parent node's execution. May be a list of dictionaries in the special case of a fusion node, which has more than one parent. experiment_folder : string String with the path to the experiment folder, where the files of the experiment will be saved. Returns ------- exp_param : dict The updated dictionary of the experiment's running-time variables after the node's execution. """ global execution_time global tex_path global tex_dict global openset_experiment exp_param = previous parameters = ast.literal_eval(node.get("parameters")) node_id = node.attrib['id'] #Get node name node_name = node.get('name') if node.tag == "collection": print "Collection", exp_param.keys() images, classes, extract_path, read_time = \ read_collection.main(node_name, openset_experiment, parameters, node_id) execution_time += read_time exp_param['images'] = images exp_param['classes'] = classes exp_param['extract_path'] = extract_path elif node.tag == "train_test_method": print "train_test_method", exp_param.keys() images = exp_param['images'] classes = exp_param['classes'] images, classes, train_test_list, train_test_time = \ train_test.main(images, classes, experiment_folder, node_name, parameters, openset_experiment, node_id) execution_time += train_test_time exp_param['images'] = images exp_param['classes'] = classes exp_param['train_test_list'] = train_test_list exp_param['train_test_method'] = node_name exp_param['train_test_parameters'] = parameters elif node.tag == "descriptor": print "descriptor", exp_param.keys() images = exp_param['images'] extract_path = exp_param['extract_path'] classes_keys = exp_param['classes'].keys() if node_name == "bag": train_test_list = exp_param['train_test_list'] images, extract_time = extract_bag.main(images, train_test_list, extract_path, experiment_folder, parameters, node_id) elif node_name == "bovg": train_test_list = exp_param['train_test_list'] images, extract_time = extract_bovg.main(images, train_test_list, extract_path, experiment_folder, parameters, node_id) else: images, extract_time = extract_features.main( images, classes_keys, extract_path, node_name, parameters, node_id) execution_time += extract_time exp_param['images'] = images exp_param['descriptor'] = node_name elif node.tag == "normalizer": try: manager = Manager() images = manager.dict(exp_param['images']) train_test_list = exp_param['train_test_list'] except: print "\n\tMissing Input. Exiting." sys.exit(1) norm_fv_paths, normalize_time = normalize_features.main( images, train_test_list, experiment_folder, node_name, parameters, node_id) execution_time += normalize_time del exp_param['images'] exp_param['fv_paths'] = norm_fv_paths elif node.tag == "classifier": try: classes = exp_param['classes'] train_test_list = exp_param['train_test_list'] descriptor = exp_param['descriptor'] try: fv_paths = exp_param['fv_paths'] del exp_param['fv_paths'] except: images = exp_param['images'] fv_paths = util.save_file_extract(images, train_test_list, experiment_folder) except: print "\n\tMissing Input. Exiting." sys.exit(1) images, classes_list, classify_time = classify.main( fv_paths, classes.keys(), train_test_list, experiment_folder, node_name, parameters, descriptor, node_id) execution_time += classify_time exp_param['images'] = images exp_param['classes_list'] = classes_list elif node.tag == "fusion_method": len_exp_param = len(exp_param) #list with the images dictionaries, classes dictionaries, and train and # test set list list_images = [] list_classes = [] list_train_test = [] extract_path = exp_param[INDEX_ZERO]['extract_path'] for index in range(len_exp_param): try: list_images.append(exp_param[index]['images']) except: images = {} for fv_path in exp_param[index]['fv_paths']: print "fv_path:", fv_path images_new = util.read_fv_file(fv_path) images = util.merge_dict(images, images_new) list_images.append(images) list_classes.append(exp_param[index]['classes']) #In case that it performs the fusion of collections, there is no # train_test_list try: list_train_test.append(exp_param[index]['train_test_list']) except: list_train_test.append(None) #classes_list is present only after the classification module try: classes_list = exp_param[INDEX_ZERO]['classes_list'] except: classes_list = None try: train_test_method = exp_param[INDEX_ZERO]['train_test_method'] train_test_parameters = exp_param[INDEX_ZERO][ 'train_test_parameters'] except: train_test_method = None train_test_parameters = None images, classes, train_test_list, fusion_time = \ fusion.main(list_images, list_classes, list_train_test, classes_list, experiment_folder, node_name, parameters, node_id) execution_time += fusion_time exp_param = {} exp_param['images'] = images exp_param['classes'] = classes if train_test_list is not None: exp_param['train_test_list'] = train_test_list if classes_list is not None: exp_param['classes_list'] = classes_list if train_test_method is not None: exp_param['train_test_method'] = train_test_method exp_param['train_test_parameters'] = train_test_parameters exp_param['descriptor'] = None exp_param['extract_path'] = extract_path elif node.tag == "evaluation_measure": try: images = exp_param['images'] train_test_list = exp_param['train_test_list'] classes_list = exp_param['classes_list'] except: print "\n\tMissing Input. Exiting." sys.exit(1) evaluation_time, evaluation_path = evaluation.main( images, train_test_list, classes_list, experiment_folder, node_name, parameters, node_id) execution_time += evaluation_time #Dictionaries to create the tex file train_test_method = exp_param['train_test_method'] train_test_parameters = str(exp_param['train_test_parameters']) if train_test_method not in tex_dict: tex_dict[train_test_method] = {} train_test_dict = tex_dict[train_test_method] if train_test_parameters not in train_test_dict: train_test_dict[train_test_parameters] = {} output_dict = train_test_dict[train_test_parameters] if node_name not in output_dict: output_dict[node_name] = [] list_output = [evaluation_path, classes_list[0], node_id] if list_output not in output_dict[node_name]: output_dict[node_name].append(list_output) train_test_dict[train_test_parameters] = output_dict tex_dict[train_test_method] = train_test_dict elif node.tag == "preprocessing": images = exp_param['images'] classes = exp_param['classes'] images, classes, preprocessing_time = preprocessing.main( images, classes, experiment_folder, node_name, parameters, node_id) execution_time += preprocessing_time exp_param['images'] = images exp_param['classes'] = classes else: print "Error. Unknown Tag." sys.exit(1) return exp_param
Augment each image to create more data ''' i = 0 for batch in self.datagen.flow_from_directory( directory=self.import_path, save_to_dir=self.export_path, save_prefix='keras_', save_format='jpg', batch_size=1, color_mode='rgb'): i += 1 if i == self.num_images: break if __name__ == "__main__": main() os.chdir(home) shape = (299, 299, 3) import_path = 'data/Keras_Images' export_path = 'data/Keras_Images' #Save resized training and test photos into each folder for i in artist: #Test Images test_path = 'data/Test/{0}/'.format(i) test_resize = ImagePipeline(test_dict[i], shape, home, import_path, export_path) test_resize.image_folder(test_path) test_resize.save_folder('data/Test', i)
def main(expdir, recipe, computing): '''main function''' overwrite = False if os.path.isdir(expdir): text = '' while text not in ('o', 'r'): text = raw_input('%s already exists, do you want to ' 'resume experiment (r) or overwrite (o) ' '(respond with o or r)' % expdir) if text == 'o': overwrite = True else: #create the experiments directory os.makedirs(expdir) #copy the config files if overwrite: shutil.copyfile(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) else: tools.safecopy(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) shutil.copyfile(os.path.join(recipe, 'coder.cfg'), os.path.join(expdir, 'coder.cfg')) shutil.copyfile(os.path.join(recipe, 'structure.xml'), os.path.join(expdir, 'structure.xml')) shutil.copyfile(os.path.join(recipe, 'database.cfg'), os.path.join(expdir, 'database.cfg')) shutil.copyfile(os.path.join(recipe, 'cross_validation_ppall.cfg'), os.path.join(expdir, 'cross_validation_ppall.cfg')) acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(recipe, 'acquisition.cfg')) modelname = acquisitionconf.get('acquisition', 'name') shutil.copyfile( os.path.join(os.getcwd(), 'assist', 'acquisition', 'defaults', modelname + '.cfg'), os.path.join(expdir, modelname + '.cfg')) #read the cross_validation config file expconf = ConfigParser() expconf.read(os.path.join(recipe, 'cross_validation_ppall.cfg')) #default conf file default = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults', 'cross_validation_ppall.cfg') #apply the defaults if os.path.exists(default): tools.default_conf(expconf, default) expconf = dict(expconf.items('cross_validation_ppall')) #read the data config file if not os.path.exists(os.path.join(recipe, 'database.cfg')): raise Exception('cannot find database.cfg in %s' % recipe) dataconf = ConfigParser() dataconf.read(os.path.join(recipe, 'database.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) # for word specific thresholds (not used anymore) #if os.path.isfile(os.path.join(recipe,'word_thresholds.pkl')): # print('File with wordthresholds found in recipe') # shutil.copyfile(os.path.join(recipe, 'word_thresholds.pkl'), # os.path.join(expdir, 'word_thresholds.pkl')) # thresholdsarepresent = True #else: # print('No file found with wordthresholds, using a fixed one') # thresholdsarepresent = False labelvecs = [] names = [] taskstrings = dict() features = dict() print 'Searching for all speakers...' for speaker in dataconf.sections(): print ' speaker: %s' % (speaker) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) # typesplit_coder.py line 51 to see all labels and corresponding output capsule numbers #read and code all the tasks for line in open( dataconf.get(speaker, 'tasks') ): #'recording1_Voice_10 <move_rel direction="forward" distance="little" throttle="fast" />' splitline = line.strip().split(' ') name = speaker + '_' + splitline[0] #'recording1_Voice_10' names.append(name) taskstring = ' '.join( splitline[1:] ) #'<move_rel_direction="forwqrd" distance="little" throttle="fast"/>' taskstrings[name] = taskstring task = read_task(taskstring) labelvecs.append(coder.encode(task)) # read the feature files for l in open(os.path.join(dataconf.get(speaker, 'features'), 'feats')): splitline = l.strip().split( ' ' ) #['recording1_Voice_10', '/esat/spchtemp/scratch/r0580562/databases/grabo_features/pp2/recording1_Voice_10.npy'] featname = speaker + '_' + splitline[0] features[featname] = ' '.join(splitline[1:]) print 'Devide data into blocks...' #devide the data into blocks, look for existing blocksfile in recipe because takes a very long time to make!!! blocksfile = os.path.join(recipe, 'blocks.pkl') if os.path.exists(blocksfile): print 'Loading found blocks file (check if number of blocks is still the same)' with open(blocksfile, 'rb') as fid: blocks = pickle.load(fid) else: print 'No blocksfile found in recipe, making new one' blocks = make_blocks( np.array(labelvecs), expconf, expdir ) #massive list, matrix of [[..,..,..,..],[..,... .]] numbers between 1-350 approx with open(blocksfile, 'wb') as fid: pickle.dump(blocks, fid) print 'Shuffle speakers...' # look for existing train and test sets and load them in ('saved_ids' in recipe), because takes a very long time to make!!! sets_properties = {} if os.path.isdir(os.path.join(recipe, 'saved_ids')): saved_ids = ConfigParser() saved_ids.read( os.path.join(recipe, 'saved_ids', 'cross_validation_ppall.cfg')) sets_properties = dict(saved_ids.items('cross_validation_ppall')) else: sets_properties['numblocks'] = 0 sets_properties['numexp'] = 0 if (sets_properties['numblocks'] == expconf['numblocks']) and (sets_properties['numexp'] == expconf['numexp']): print ' Loading found test recipe' trainids_saved = os.path.join(recipe, 'saved_ids', 'trainids.pkl') with open(trainids_saved, 'rb') as fid: trainids = pickle.load(fid) testids_saved = os.path.join(recipe, 'saved_ids', 'testids.pkl') with open(testids_saved, 'rb') as fid: testids = pickle.load(fid) else: print ' No saved test and train sets found with same crossvalidation configuration in the recipe' # seed the random number generator random.seed(3105) trainids = [None] * (len(blocks) - 1) #len(blocks)=15 testids = [None] * (len(blocks) - 1) print ' Number of blocks: %d' % (len(blocks)) b = 0 while b < (len(blocks) - 1): #for b in range(len(blocks)-1): print ' block %d' % b trainids[b] = [None] * int(expconf['numexp']) testids[b] = [None] * int(expconf['numexp']) for e in range(int(expconf['numexp'])): trainids[b][e] = list( itertools.chain.from_iterable(random.sample(blocks, b + 1))) testids[b][e] = [ x for x in range(len(names)) if x not in trainids[b][e] ] #scale factor to use more smaller blocks and less bigger blocks (for the curve, it saturates) newb = int( np.floor((b + 1) * float(expconf['scale']) + int(expconf['increment']) - 1)) newb = min(newb, len(blocks) - 2) if b == newb: break else: b = newb os.makedirs(os.path.join(expdir, 'saved_ids')) trainids_saved = os.path.join(expdir, 'saved_ids', 'trainids.pkl') testids_saved = os.path.join(expdir, 'saved_ids', 'testids.pkl') with open(trainids_saved, 'wb') as fid: pickle.dump(trainids, fid) with open(testids_saved, 'wb') as fid: pickle.dump(testids, fid) shutil.copyfile( os.path.join(recipe, 'cross_validation_ppall.cfg'), os.path.join(expdir, 'saved_ids', 'cross_validation_ppall.cfg')) #create an expdir for each experiment b = int(expconf['startblocks']) - 1 #0 print 'Launch the experiments...' while True: for e in range(int(expconf['numexp'])): print ' train blocks: %d, experiment %s' % (b + 1, e) #creat the directory subexpdir = os.path.join(expdir, '%dblocks_exp%d' % (b + 1, e)) if os.path.exists(os.path.join(subexpdir, 'f1')): continue if not os.path.isdir(subexpdir): os.makedirs(subexpdir) #create pointers to the config files tools.symlink(os.path.join(expdir, 'acquisition.cfg'), os.path.join(subexpdir, 'acquisition.cfg')) tools.symlink(os.path.join(expdir, 'coder.cfg'), os.path.join(subexpdir, 'coder.cfg')) tools.symlink(os.path.join(expdir, 'structure.xml'), os.path.join(subexpdir, 'structure.xml')) tools.symlink(os.path.join(expdir, 'database.cfg'), os.path.join(subexpdir, 'database.cfg')) #if thresholdsarepresent: # tools.symlink(os.path.join(expdir, 'word_thresholds.pkl'), # os.path.join(subexpdir, 'word_thresholds.pkl')) if not os.path.exists(os.path.join(subexpdir, 'trainfeats')): trainutts = [names[i] for i in trainids[b][e]] print 'number of examples: %d' % len(trainutts) testutts = [names[i] for i in testids[b][e]] #create the train and test sets tools.writefile(os.path.join(subexpdir, 'trainfeats'), {utt: features[utt] for utt in trainutts}) tools.writefile(os.path.join(subexpdir, 'traintasks'), {utt: taskstrings[utt] for utt in trainutts}) tools.writefile(os.path.join(subexpdir, 'testfeats'), {utt: features[utt] for utt in testutts}) tools.writefile(os.path.join(subexpdir, 'testtasks'), {utt: taskstrings[utt] for utt in testutts}) if computing in ('condor', 'condor_gpu'): #create the outputs directory if not os.path.isdir(os.path.join(subexpdir, 'outputs')): os.makedirs(os.path.join(subexpdir, 'outputs')) if computing == 'condor_gpu': jobfile = 'run_script_GPU.job' else: jobfile = 'run_script.job' #only submit the job if it not running yet in_queue = os.popen('if condor_q -nobatch -wide | grep -q %s; ' 'then echo true; else echo false; fi' % subexpdir).read().strip() == 'true' #submit the condor job if not in_queue: os.system('condor_submit expdir=%s script=train_test' ' assist/condor/%s' % (subexpdir, jobfile)) else: train_test.main(subexpdir) newb = int( np.floor((b + 1) * float(expconf['scale']) + int(expconf['increment']) - 1)) newb = min(newb, len(blocks) - 2) if b == newb: break else: b = newb