Esempio n. 1
0
def collect_output(datafile, min_points=1, mask_length=4):
    """ collect all output from the flash program output directory created by
        the flash program in flash_dir and calculate some additional stats on each flash        
        """
    import numpy as np

    logger = logging.getLogger('FlashAutorunLogger')

    # outdir = os.path.join(flash_dir,flash_output_dir)
    # os.chdir(outdir)

    lma = LMAdataFile(datafile, mask_length=mask_length)

    # get the mapping from flash_ids to the points
    order = np.argsort(lma.flash_id)

    # In the case of no data in the file, lma.data.shape will have length zero, i.e., a 0-d array
    if len(lma.data.shape) == 0:
        # No data
        flashes = []
    else:
        flid = lma.flash_id[order]
        boundaries, = np.where(flid[1:] -
                               flid[:-1])  # where indices are nonzero
        boundaries = np.hstack(([0], boundaries + 1))

        all_data = lma.data[order]

        max_idx = len(flid)  #- 1
        slice_lower_edges = tuple(boundaries)
        slice_upper_edges = slice_lower_edges[1:] + (max_idx, )
        slices = zip(slice_lower_edges, slice_upper_edges)

        flashes = [Flash(all_data[slice(*sl)]) for sl in slices]

        # calculate extra flash metadata, e.g., initation, centroid
        logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % (
            len(flashes), )
        logger.info(logtext)
        # print flashes[0].points.dtype
        for fl in flashes:
            header = ''.join(lma.header)
            fl.metadata = FlashMetadata(header)
            calculate_flash_stats(fl)

    return lma, flashes
Esempio n. 2
0
def collect_output(datafile, min_points=1, mask_length=4):
    """ collect all output from the flash program output directory created by
        the flash program in flash_dir and calculate some additional stats on each flash        
        """
    import numpy as np
    
    logger = logging.getLogger('FlashAutorunLogger')
    
    # outdir = os.path.join(flash_dir,flash_output_dir)
    # os.chdir(outdir)
    
    lma = LMAdataFile(datafile, mask_length=mask_length)
    
    # get the mapping from flash_ids to the points
    order = np.argsort(lma.flash_id)
    
    # In the case of no data in the file, lma.data.shape will have length zero, i.e., a 0-d array
    if len(lma.data.shape) == 0:
        # No data
        flashes = []
    else:
        flid = lma.flash_id[order]
        boundaries, = np.where(flid[1:]-flid[:-1])    # where indices are nonzero
        boundaries = np.hstack(([0], boundaries+1))
    
        all_data = lma.data[order]
    
        max_idx = len(flid) #- 1
        slice_lower_edges = tuple(boundaries)
        slice_upper_edges = slice_lower_edges[1:] + (max_idx,)
        slices = zip(slice_lower_edges, slice_upper_edges)
        
        flashes = [ Flash(all_data[slice(*sl)]) for sl in slices ]
    
        # calculate extra flash metadata, e.g., initation, centroid
        logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % (len(flashes), )
        logger.info(logtext)
        # print flashes[0].points.dtype
        for fl in flashes:
            header = ''.join(lma.header)
            fl.metadata = FlashMetadata(header)
            calculate_flash_stats(fl)
                    
    return lma, flashes
Esempio n. 3
0
def create_flash_objs(lma, good_data):
    """ lma is an LMAdataFile object. Its data instance gets overwritten with the sorted, qc'd, flash_id'd data.
    
        very similar to collect_output in autorun_mflash
        
    """
    logger = logging.getLogger('FlashAutorunLogger')
    
    
    try:
        while True:
            (unique_labels, point_labels, all_IDs) = (yield)
            
            # add flash_id column
            empty_labels = np.empty_like(point_labels)
            data = append_fields(good_data, ('flash_id',), (empty_labels,))

            # all_IDs gives the index in the original data array to
            # which each point_label corresponds
            data['flash_id'][all_IDs] = point_labels
            
            # In the case of no data in the file, lma.data.shape will have
            # length zero, i.e., a 0-d array
            if len(data.shape) == 0:
                # No data
                flashes = []
            else:
                # work first with non-singleton flashes 
                # to have strictly positive flash ids
                print data.shape
                singles = (data['flash_id'] == -1)
                non_singleton = data[ np.logical_not(singles) ]
                print non_singleton['flash_id'].shape
                order = np.argsort(non_singleton['flash_id'])
                
                ordered_data = non_singleton[order]
                flid = ordered_data['flash_id']                
                if (flid.shape[0]>0):
                    max_flash_id = flid[-1]
                else: 
                    max_flash_id = 0
                try:
                    assert max_flash_id == max(unique_labels)
                except AssertionError:
                    print "Max flash ID {0} is not as expected from unique labels {1}".format(max_flash_id, max(unique_labels))
                    
                boundaries, = np.where(flid[1:]-flid[:-1])    # where indices are nonzero
                boundaries = np.hstack(([0], boundaries+1))
                
                max_idx = len(flid) #- 1
                slice_lower_edges = tuple(boundaries)
                slice_upper_edges = slice_lower_edges[1:] + (max_idx,)
                slices = zip(slice_lower_edges, slice_upper_edges)

                flashes = [ Flash(ordered_data[slice(*sl)]) for sl in slices ]
                
                print "finished non-singletons"
                
                # Now deal with the nonsingleton points. 
                # Each singleton point will have a high flash_id,
                # starting with the previous maximum flash id.
                singleton = data[singles]
                n_singles = singleton.shape[0]

                # this operation works on a view of the original data array, 
                # so it modifies the original data array
                singleton['flash_id'] += max_flash_id + 1 + np.arange(n_singles, dtype=int)
                
                singleton_flashes = [ Flash(singleton[i:i+1]) for i in range(n_singles)]
                
                data[singles] = singleton
                print "finished singletons"
                
                flashes += singleton_flashes
                
                logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % (len(flashes), )
                logger.info(logtext)
                # print flashes[0].points.dtype
                for fl in flashes:
                    header = ''.join(lma.header)
                    fl.metadata = FlashMetadata(header)
                    calculate_flash_stats(fl)
                    # logger.info(fl.points.shape[0])
                logger.info('finished setting flash metadata')
                
                lma.raw_data = lma.data
                lma.data = data
                assert (lma.data['flash_id'].min() >=0) # this should be true since the singletons were modified in the original data array above
                lma.sort_status = 'got some flashes'
                
    except GeneratorExit:
        lma.flash_objects = flashes
Esempio n. 4
0
def create_flash_objs(lma, good_data):
    """ lma is an LMAdataFile object. Its data instance gets overwritten with the sorted, qc'd, flash_id'd data.
    
        very similar to collect_output in autorun_mflash
        
    """
    logger = logging.getLogger('FlashAutorunLogger')
    
    
    try:
        while True:
            (unique_labels, point_labels, all_IDs) = (yield)
            
            # add flash_id column
            empty_labels = np.empty_like(point_labels)
            data = append_fields(good_data, ('flash_id',), (empty_labels,))

            # all_IDs gives the index in the original data array to
            # which each point_label corresponds
            data['flash_id'][all_IDs] = point_labels
            
            # In the case of no data in the file, lma.data.shape will have
            # length zero, i.e., a 0-d array
            if len(data.shape) == 0:
                # No data
                flashes = []
            else:
                # work first with non-singleton flashes 
                # to have strictly positive flash ids
                print data.shape
                singles = (data['flash_id'] == -1)
                non_singleton = data[ np.logical_not(singles) ]
                print non_singleton['flash_id'].shape
                order = np.argsort(non_singleton['flash_id'])
                
                ordered_data = non_singleton[order]
                flid = ordered_data['flash_id']                
                if (flid.shape[0]>0):
                    max_flash_id = flid[-1]
                else: 
                    max_flash_id = 0
                try:
                    assert max_flash_id == max(unique_labels)
                except AssertionError:
                    print "Max flash ID {0} is not as expected from unique labels {1}".format(max_flash_id, max(unique_labels))
                    
                boundaries, = np.where(flid[1:]-flid[:-1])    # where indices are nonzero
                boundaries = np.hstack(([0], boundaries+1))
                
                max_idx = len(flid) #- 1
                slice_lower_edges = tuple(boundaries)
                slice_upper_edges = slice_lower_edges[1:] + (max_idx,)
                slices = zip(slice_lower_edges, slice_upper_edges)

                flashes = [ Flash(ordered_data[slice(*sl)]) for sl in slices ]
                
                print "finished non-singletons"
                
                # Now deal with the nonsingleton points. 
                # Each singleton point will have a high flash_id,
                # starting with the previous maximum flash id.
                singleton = data[singles]
                n_singles = singleton.shape[0]

                # this operation works on a view of the original data array, 
                # so it modifies the original data array
                singleton['flash_id'] += max_flash_id + 1 + np.arange(n_singles, dtype=int)
                
                singleton_flashes = [ Flash(singleton[i:i+1]) for i in range(n_singles)]
                
                data[singles] = singleton
                print "finished singletons"
                
                flashes += singleton_flashes
                
                logtext = "Calculating flash initation, centroid, area, etc. for %d flashes" % (len(flashes), )
                logger.info(logtext)
                # print flashes[0].points.dtype
                for fl in flashes:
                    header = ''.join(lma.header)
                    fl.metadata = FlashMetadata(header)
                    calculate_flash_stats(fl)
                    # logger.info(fl.points.shape[0])
                logger.info('finished setting flash metadata')
                
                lma.raw_data = lma.data
                lma.data = data
                assert (lma.data['flash_id'].min() >=0) # this should be true since the singletons were modified in the original data array above
                lma.sort_status = 'got some flashes'
                
    except GeneratorExit:
        lma.flash_objects = flashes