def collect_output(datafile, min_points=1, mask_length=4): """ collect all output from the flash program output directory created by the flash program in flash_dir and calculate some additional stats on each flash """ import numpy as np logger = logging.getLogger('FlashAutorunLogger') # outdir = os.path.join(flash_dir,flash_output_dir) # os.chdir(outdir) lma = LMAdataFile(datafile, mask_length=mask_length) # get the mapping from flash_ids to the points order = np.argsort(lma.flash_id) # In the case of no data in the file, lma.data.shape will have length zero, i.e., a 0-d array if len(lma.data.shape) == 0: # No data flashes = [] else: flid = lma.flash_id[order] boundaries, = np.where(flid[1:] - flid[:-1]) # where indices are nonzero boundaries = np.hstack(([0], boundaries + 1)) all_data = lma.data[order] max_idx = len(flid) #- 1 slice_lower_edges = tuple(boundaries) slice_upper_edges = slice_lower_edges[1:] + (max_idx, ) slices = zip(slice_lower_edges, slice_upper_edges) flashes = [Flash(all_data[slice(*sl)]) for sl in slices] # calculate extra flash metadata, e.g., initation, centroid logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % ( len(flashes), ) logger.info(logtext) # print flashes[0].points.dtype for fl in flashes: header = ''.join(lma.header) fl.metadata = FlashMetadata(header) calculate_flash_stats(fl) return lma, flashes
def collect_output(datafile, min_points=1, mask_length=4): """ collect all output from the flash program output directory created by the flash program in flash_dir and calculate some additional stats on each flash """ import numpy as np logger = logging.getLogger('FlashAutorunLogger') # outdir = os.path.join(flash_dir,flash_output_dir) # os.chdir(outdir) lma = LMAdataFile(datafile, mask_length=mask_length) # get the mapping from flash_ids to the points order = np.argsort(lma.flash_id) # In the case of no data in the file, lma.data.shape will have length zero, i.e., a 0-d array if len(lma.data.shape) == 0: # No data flashes = [] else: flid = lma.flash_id[order] boundaries, = np.where(flid[1:]-flid[:-1]) # where indices are nonzero boundaries = np.hstack(([0], boundaries+1)) all_data = lma.data[order] max_idx = len(flid) #- 1 slice_lower_edges = tuple(boundaries) slice_upper_edges = slice_lower_edges[1:] + (max_idx,) slices = zip(slice_lower_edges, slice_upper_edges) flashes = [ Flash(all_data[slice(*sl)]) for sl in slices ] # calculate extra flash metadata, e.g., initation, centroid logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % (len(flashes), ) logger.info(logtext) # print flashes[0].points.dtype for fl in flashes: header = ''.join(lma.header) fl.metadata = FlashMetadata(header) calculate_flash_stats(fl) return lma, flashes
def create_flash_objs(lma, good_data): """ lma is an LMAdataFile object. Its data instance gets overwritten with the sorted, qc'd, flash_id'd data. very similar to collect_output in autorun_mflash """ logger = logging.getLogger('FlashAutorunLogger') try: while True: (unique_labels, point_labels, all_IDs) = (yield) # add flash_id column empty_labels = np.empty_like(point_labels) data = append_fields(good_data, ('flash_id',), (empty_labels,)) # all_IDs gives the index in the original data array to # which each point_label corresponds data['flash_id'][all_IDs] = point_labels # In the case of no data in the file, lma.data.shape will have # length zero, i.e., a 0-d array if len(data.shape) == 0: # No data flashes = [] else: # work first with non-singleton flashes # to have strictly positive flash ids print data.shape singles = (data['flash_id'] == -1) non_singleton = data[ np.logical_not(singles) ] print non_singleton['flash_id'].shape order = np.argsort(non_singleton['flash_id']) ordered_data = non_singleton[order] flid = ordered_data['flash_id'] if (flid.shape[0]>0): max_flash_id = flid[-1] else: max_flash_id = 0 try: assert max_flash_id == max(unique_labels) except AssertionError: print "Max flash ID {0} is not as expected from unique labels {1}".format(max_flash_id, max(unique_labels)) boundaries, = np.where(flid[1:]-flid[:-1]) # where indices are nonzero boundaries = np.hstack(([0], boundaries+1)) max_idx = len(flid) #- 1 slice_lower_edges = tuple(boundaries) slice_upper_edges = slice_lower_edges[1:] + (max_idx,) slices = zip(slice_lower_edges, slice_upper_edges) flashes = [ Flash(ordered_data[slice(*sl)]) for sl in slices ] print "finished non-singletons" # Now deal with the nonsingleton points. # Each singleton point will have a high flash_id, # starting with the previous maximum flash id. singleton = data[singles] n_singles = singleton.shape[0] # this operation works on a view of the original data array, # so it modifies the original data array singleton['flash_id'] += max_flash_id + 1 + np.arange(n_singles, dtype=int) singleton_flashes = [ Flash(singleton[i:i+1]) for i in range(n_singles)] data[singles] = singleton print "finished singletons" flashes += singleton_flashes logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % (len(flashes), ) logger.info(logtext) # print flashes[0].points.dtype for fl in flashes: header = ''.join(lma.header) fl.metadata = FlashMetadata(header) calculate_flash_stats(fl) # logger.info(fl.points.shape[0]) logger.info('finished setting flash metadata') lma.raw_data = lma.data lma.data = data assert (lma.data['flash_id'].min() >=0) # this should be true since the singletons were modified in the original data array above lma.sort_status = 'got some flashes' except GeneratorExit: lma.flash_objects = flashes