def get_cp_pipeline_path(self): filename_regex_obj = re.compile(fntranslate( self.cp_pipeline_fnpattern)) # Look both in PIPES folder and process folder of the pipe (named # after pipe). for location in (self.process_path, self.pipes_path): cp_pipeline_files = [ os.path.join(location, filename) for filename in os.listdir(location) if filename_regex_obj.match(filename) ] # Check if found anything and complain otherwise. if len(cp_pipeline_files) > 1: raise BrainyProcessError( 'More than one CP pipeline settings ' 'file found matching: %s in %s' % (self.cp_pipeline_fnpattern, self.process_path)) elif len(cp_pipeline_files) == 1: # First found pipeline. Rest is ignored. return cp_pipeline_files[0] # Failed to find anything. raise BrainyProcessError( 'No CP pipeline settings file' ' found matching: %s in %s' % (self.cp_pipeline_fnpattern, self.process_path))
def submit(self): results = list() for fused_filename in self.fused_files: matlab_code = self.get_matlab_code(fused_filename) fused_report = fused_filename.replace( '.mat', '.results_%s' % get_timestamp_str()) submission_result = self.submit_matlab_job( matlab_code, report_file=fused_report, ) results.append(submission_result) if not results: raise BrainyProcessError(warning='Failed to find complete ' 'measurements.. check or restart previous' ' step') print( ''' <status action="%(step_name)s">submitting (%(results_count)d) fusion jobs.. <output>%(submission_result)s</output> </status> ''' % { 'step_name': self.step_name, 'results_count': len(results), 'submission_result': escape_xml(str(results)), }) self.set_flag('submitted')
def resubmit(self): results = list() for fused_filename in self.fused_files: matlab_code = self.get_matlab_code(fused_filename) fused_report = 'DataFusion_' + fused_filename.replace( '.mat', '.results_%s' % get_timestamp_str()) resubmission_result = self.submit_matlab_job( matlab_code, report_file=fused_report, is_resubmitting=True, ) results.append(resubmission_result) if not results: raise BrainyProcessError(warning='Failed to resubmit data fusion') print( ''' <status action="%(step_name)s">resubmitting (%(results_count)d) fusion jobs.. <output>%(resubmission_result)s</output> </status> ''' % { 'step_name': self.step_name, 'results_count': len(results), 'resubmission_result': escape_xml(str(results)), }) self.set_flag('resubmitted') super(CPDataFusion, self).resubmit()
def submit(self): results = list() for batch_filename in self.batch_files: matlab_code = self.get_matlab_code(batch_filename) batch_report = batch_filename.replace( '.mat', '.results_%s' % get_timestamp_str()) submission_result = self.submit_matlab_job( matlab_code, report_file=batch_report, ) results.append(submission_result) if not results: # We want to explicitly check for errors and report them, if any # found. self.check_logs_for_errors() # If no errors were found, we still have to report a warning! raise BrainyProcessError(warning='Failed to find any batches.. ' 'check or restart previous step') print( ''' <status action="%(step_name)s">submitting (%(batch_count)d) batches.. <output>%(submission_result)s</output> </status> ''' % { 'step_name': self.step_name, 'batch_count': len(results), 'submission_result': escape_xml(str(results)), }) self.set_flag('submitted')
def get_cp_pipeline_path(self): filename_regex_obj = re.compile(fntranslate( self.cp_pipeline_fnpattern)) cp_pipeline_files = [ filename for filename in os.listdir(self.process_path) if filename_regex_obj.match(filename) ] if len(cp_pipeline_files) > 1: raise BrainyProcessError( 'More than one CP pipeline settings file' ' found matching: %s in %s' % (self.cp_pipeline_fnpattern, self.process_path)) elif len(cp_pipeline_files) == 0: raise BrainyProcessError( 'No CP pipeline settings file' ' found matching: %s in %s' % (self.cp_pipeline_fnpattern, self.process_path)) return os.path.join(self.process_path, cp_pipeline_files[0])
def resubmit(self): resubmission_results = list() output_batches = self.get_out_files() result_batches = self.get_result_files() for batch_filename in self.batch_files: # Resubmit only those files that have no data, i.e. failed with no # output. batch_prefix = self.parse_batch(batch_filename) if batch_prefix in output_batches \ and batch_prefix in result_batches: # This batch is complete. Do not resubmit it. print '<!-- CP batch %s is complete. Skipping.. -->' % \ batch_filename continue # Batch has not produced any output yet. We can resubmit it. matlab_code = self.get_matlab_code(batch_filename) batch_report = batch_filename.replace( '.mat', '.results_%s' % get_timestamp_str()) resubmission_result = self.submit_matlab_job( matlab_code, report_file=batch_report, is_resubmitting=True, ) resubmission_results.append(resubmission_result) if not resubmission_results: # We want to explicitly check for errors and report them, if any # found. self.check_logs_for_errors() # If no errors were found, we still have to report a warning! raise BrainyProcessError(warning='Failed to find any batches to ' 'resubmit. Check or restart previous ' 'step.') print( ''' <status action="%(step_name)s">resubmitting (%(batch_count)d) batches.. <output>%(resubmission_result)s</output> </status> ''' % { 'step_name': self.step_name, 'batch_count': len(resubmission_results), 'resubmission_result': escape_xml(str(resubmission_results)), }) self.set_flag('resubmitted') super(CPCluster, self).resubmit()
def has_data(self): ''' Check if all files matching given patterns have been linked. ''' print self.target_location if not os.path.exists(self.target_location): raise BrainyProcessError(warning='Expected target folder is not ' 'found: %s' % self.target_location) def get_name(root, name): return name linking_per_file_type = { 'f': ['hardlink', 'symlink'], 'd': ['symlink'], } for file_type in linking_per_file_type: linking = linking_per_file_type[file_type] for link_type in linking: if link_type in self.file_patterns: patterns = self.file_patterns[link_type] source_matches = list( find_files( path=self.source_location, match=MatchAnyPatternsAndTypes( filetypes=[file_type], names=patterns, ), collect=get_name, recursive=self.recursively, )) target_matches = list( find_files( path=self.target_location, match=MatchAnyPatternsAndTypes( filetypes=[file_type], names=patterns, ), collect=get_name, recursive=self.recursively, )) if not source_matches == target_matches: return False return True
def build_linking_args(source_location, target_location, nested_file_patterns, file_type, recursively): for link_type in ['hardlink', 'symlink']: if link_type in nested_file_patterns: if type(nested_file_patterns[link_type]) != list \ or len(nested_file_patterns[link_type]) == 0: raise BrainyProcessError( warning='LinkFiles process requires a non empty list ' 'of file patterns which can be match to ' 'files in source_location.') args = { 'source_location': source_location, 'target_location': target_location, 'file_patterns': nested_file_patterns[link_type], 'link_type': link_type, 'file_type': file_type, 'recursively': recursively, } yield args
def link(source_path, target_path, patterns, link_type='hard', file_type='f', recursively=False): ''' Expect keys 'hardlink' and 'symlink' keys in description['file_patterns']. If pattern string starts and ends with '/' then it is a regexp, otherwise it is fnmatch. ''' assert os.path.exists(source_path) assert os.path.exists(target_path) file_matches = find_files( path=source_path, match=MatchAnyPatternsAndTypes( filetypes=[file_type], names=patterns, ), recursive=recursively, ) if link_type == 'hardlink' and file_type == 'f': make_link = os.link elif link_type == 'symlink': make_link = os.symlink else: raise IOError('Unsupported link type: %s' % link_type) for source_file in file_matches: link_path = os.path.join(target_path, os.path.basename(source_file)) try: print 'Linking "%s" -> "%s"' % (source_file, link_path) make_link(source_file, link_path) except IOError as error: if 'File exists' in str(error): message = 'It looks like linking was already done. Maybe '\ 'you are trying to re-run project incorrectly. Make '\ 'sure to clean previous results before retrying.' else: message = 'Unknown input-output error.' raise BrainyProcessError(warning=message, output=str(error))