def run_files_with_params(files, output_path, params, clusterer=None, min_points=1, retain_ascii_output=True, cleanup_tmp=True): if clusterer is None: from autorun_mflash import build, cleanup_build, collect_output from autorun_mflash import cluster clusterer = cluster logger = logging.getLogger('FlashAutorunLogger') now = datetime.datetime.now().strftime('Flash autosort started %Y%m%d-%H%M%S') logger.info(now) # Calculate the number of header lines based on the largest data file. f_sizes = [os.path.getsize(f) for f in files] largest_file_index = f_sizes.index(max(f_sizes)) lma_pipe, command, any_input = cat_LMA(files[largest_file_index]) lma_text, err = lma_pipe.communicate(input=any_input) isDataLine = r"^.*\*+.*data.*\*+.*" #Search for asterisks, data, and asterisks matchDataLine = re.compile(isDataLine, re.IGNORECASE) split_lma_text = lma_text.split('\n') for lineIdx, line in enumerate(split_lma_text): if matchDataLine.search(line): params['nhead'] = lineIdx+1 logger.info("Header is %d lines. This length will be used for all files this run." % (params['nhead'],)) break # We could parse for the number of sources from the header, but that is wrong sometimes. # Instead, set the number of points to the total number of lines in the file minus the header. # Add 10% to the total. The largest file might actually not be the largest when uncompressed due to variable packing efficiency params['n_sources'] = int(1.10*(len(split_lma_text) - params['nhead'])) logger.info('Calculated max source count for this run: {0}'.format(params['n_sources'])) del lma_text, split_lma_text # lma_pipe.close() logger.info('%s', params) h5_outfiles = [] for a_file in files: try: file_base_name = os.path.split(a_file)[-1].replace('.gz', '') outfile = os.path.join(output_path, file_base_name+'.flash') # clusterer should use the name outfile as the base for any, e.g., ASCII data it would like to save lmadata, flashes = clusterer(a_file, output_path, outfile, params, logger, min_points=min_points, retain_ascii_output=retain_ascii_output, cleanup_tmp=cleanup_tmp ) header = ''.join(lmadata.header) fl_metadata = FlashMetadata(header) outfile_with_extension = outfile + '.h5' h5_outfiles.append(outfile_with_extension) write_output(outfile_with_extension, flashes, a_file, metadata=fl_metadata) except: logger.error("Did not successfully sort %s \n Error was: %s" % (a_file, sys.exc_info()[1])) raise # loghandler.doRollover() return h5_outfiles
def sort_file(filename, directory): """ Sort one LMA data file into flashes. dir is the directory with the flash program""" logger = logging.getLogger('FlashAutorunLogger') f, command, the_input = cat_LMA(filename) run_cmd = [os.path.join(directory, flash_prg_name)] logger.info( 'Running %s' % (run_cmd,)) #, 'with stdin from ', command # comment out stdout=subprocess.PIPE to print stdout to the terminal. when uncommented, # stdout is captured to python, which leads to less noise in the terminal p = subprocess.Popen(run_cmd, stdin=f.stdout, stdout=subprocess.PIPE)#, preexec_fn=f.stdin.close) # The communication step is key to not blocking at completion. out, err = p.communicate()#input=the_input) #out, err not connected to pipes, so nothing to capture or print # print out # print 'Errors: ', err return out, err
def sort_file(filename, directory): """ Sort one LMA data file into flashes. dir is the directory with the flash program""" logger = logging.getLogger('FlashAutorunLogger') f, command, the_input = cat_LMA(filename) run_cmd = [os.path.join(directory, flash_prg_name)] logger.info('Running %s' % (run_cmd, )) #, 'with stdin from ', command # comment out stdout=subprocess.PIPE to print stdout to the terminal. when uncommented, # stdout is captured to python, which leads to less noise in the terminal p = subprocess.Popen(run_cmd, stdin=f.stdout, stdout=subprocess.PIPE) #, preexec_fn=f.stdin.close) # The communication step is key to not blocking at completion. out, err = p.communicate( ) #input=the_input) #out, err not connected to pipes, so nothing to capture or print # print out # print 'Errors: ', err return out, err