def cluster_events(events, testfunc, clusterfunc, sortfunc = None, bailoutfunc = None, verbose = False): """ Cluster the events in an event list. testfunc will be passed a pair of events in random order, and must return 0 (or False) if they should be clustered. clusterfunc will be passed a pair of events in random order, and must return an event that is the "cluster" of the two. clusterfunc is free to return a new events, or modify one or the other of its parameters in place and return it. If sortfunc and bailoutfunc are both not None (if one is provided the other must be as well), the events will be sorted into "increasing" order using sortfunc as a comparison operator, and then only pairs of events for which bailoutfunc returns 0 (or False) will be considered for clustering. The return value is True if the events in the event list were modified, and False if they were not (although their order might have changed). """ changed = False while True: if verbose: print >>sys.stderr, "clustering pass:"******"\tsorting ..." events.sort(sortfunc) outer_did_cluster = False i = 0 while i < len(events): if events[i] is None: i += 1 continue if verbose and not (i % 13): print >>sys.stderr, "\t%d / %d%s\r" % (i + 1, len(events), " " * (int(math.floor(math.log10(len(events) or 1))) + 1)), inner_did_cluster = False for j in xrange(i + 1, len(events)): if events[j] is not None: if not testfunc(events[i], events[j]): events[i] = clusterfunc(events[i], events[j]) events[j] = None inner_did_cluster = True elif (sortfunc is not None) and bailoutfunc(events[i], events[j]): break if inner_did_cluster: outer_did_cluster = True else: i += 1 if verbose: print >>sys.stderr, "\t%d / %d%s" % (len(events), len(events), " " * (int(math.floor(math.log10(len(events) or 1))) + 1)) if not outer_did_cluster: if verbose: print >>sys.stderr, "\tno change" break iterutils.inplace_filter(lambda event: event is not None, events) changed = True return changed
def remove_too_short_segments(seglistdict, timing_params): """ Remove segments from seglistdict that are too short to analyze. CAUTION: this function modifies seglistdict in place. """ for seglist in seglistdict.values(): iterutils.inplace_filter(lambda seg: segment_ok(timing_params, seg), seglist)
def remove_too_short_segments(seglists, min_segment_length, pad): """ Remove segments from the segmentlistdict seglists that are too short to analyze. CAUTION: this function modifies seglists in place. """ for seglist in seglists.values(): iterutils.inplace_filter(lambda seg: segment_ok(seg, min_segment_length, pad), seglist)
def remove_too_short_segments(seglists, min_segment_length, pad): """ Remove segments from the segmentlistdict seglists that are too short to analyze. CAUTION: this function modifies seglists in place. """ for seglist in seglists.values(): iterutils.inplace_filter( lambda seg: segment_ok(seg, min_segment_length, pad), seglist)
def from_ligolw(filepath, table_name, columns=None, start=None, end=None, **kwargs): """Load a LIGO_LW table from a file. @param filepath path to `LIGO_LW` XML file @param table_name name of the requested `LIGO_LW` table @param columns a list of valid `LIGO_LW` column names for the new table (defaults to all) @param start minimum GPS time for returned triggers @param end maximum GPS time for returned triggers @param kwargs UNDOCUMENTED @returns the requested `LIGO_LW` table. """ table_name = which_table(table_name) # extract table with correct columns if columns: TableType = lsctables.TableByName[table_name] _oldcols = TableType.loadcolumns TableType.loadcolumns = columns # load file xmldoc = ligolw_utils.load_filename(filepath) out = ligolw_table.get_table(xmldoc, table_name) if start or end: time = time_func(table_name) start = start is not None and start or segments.NegInfinity end = end is not None and end or segments.PosInfinity keep = lambda row: ((start <= float(time(row))) & (time(row) < end)) iterutils.inplace_filter(keep, out) if columns: TableType.loadcolumns = _oldcols return out
def ReadSnglInspiralFromFiles(fileList, verbose=False, filterFunc=None): """ Read the SnglInspiralTables from a list of files. If filterFunc is not None, only keep triggers for which filterFunc evaluates to True. Ex.: filterFunc=lambda sng: sng.snr >= 6.0 @param fileList: list of input files @param verbose: print progress """ # NOTE: this function no longer carries out event ID mangling (AKA # reassignment). Please adjust calling codes accordingly! # This means that identical event IDs produced by lalapps_thinca in # non-slide files having the same GPS start time will stay identical, # affecting zerolag and injection runs made over the same data. # # In consequence, if the calling code is going to reconstruct coincs # from the sngl event IDs, and if these include multiple injection # runs, coinc finding should be done one file at a time - see the # readCoincInspiralFromFiles function in CoincInspiralUtils.py sngls = lsctables.New(lsctables.SnglInspiralTable, \ columns=lsctables.SnglInspiralTable.loadcolumns) lsctables.use_in(ExtractSnglInspiralTableLIGOLWContentHandler) for i, file in enumerate(fileList): if verbose: print str(i + 1) + "/" + str(len(fileList)) + ": " xmldoc = utils.load_filename( file, verbose=verbose, contenthandler=ExtractSnglInspiralTableLIGOLWContentHandler) try: sngl_table = lsctables.SnglInspiralTable.get_table(xmldoc) if filterFunc is not None: iterutils.inplace_filter(filterFunc, sngl_table) except ValueError: #some xml files have no sngl table, that's OK sngl_table = None if sngl_table: sngls.extend(sngl_table) xmldoc.unlink() #free memory return sngls
def ReadSnglInspiralFromFiles(fileList, verbose=False, filterFunc=None): """ Read the SnglInspiralTables from a list of files. If filterFunc is not None, only keep triggers for which filterFunc evaluates to True. Ex.: filterFunc=lambda sng: sng.snr >= 6.0 @param fileList: list of input files @param verbose: print progress """ # NOTE: this function no longer carries out event ID mangling (AKA # reassignment). Please adjust calling codes accordingly! # This means that identical event IDs produced by lalapps_thinca in # non-slide files having the same GPS start time will stay identical, # affecting zerolag and injection runs made over the same data. # # In consequence, if the calling code is going to reconstruct coincs # from the sngl event IDs, and if these include multiple injection # runs, coinc finding should be done one file at a time - see the # readCoincInspiralFromFiles function in CoincInspiralUtils.py sngls = lsctables.New(lsctables.SnglInspiralTable, \ columns=lsctables.SnglInspiralTable.loadcolumns) lsctables.use_in(ExtractSnglInspiralTableLIGOLWContentHandler) for i,file in enumerate(fileList): if verbose: print str(i+1)+"/"+str(len(fileList))+": " xmldoc = utils.load_filename(file, verbose=verbose, contenthandler=ExtractSnglInspiralTableLIGOLWContentHandler) try: sngl_table = lsctables.SnglInspiralTable.get_table(xmldoc) if filterFunc is not None: iterutils.inplace_filter(filterFunc, sngl_table) except ValueError: #some xml files have no sngl table, that's OK sngl_table = None if sngl_table: sngls.extend(sngl_table) xmldoc.unlink() #free memory return sngls
def ligolw_thinca( xmldoc, process_id, coinc_definer_row, event_comparefunc, thresholds, ntuple_comparefunc = default_ntuple_comparefunc, effective_snr_factor = 250.0, veto_segments = None, trigger_program = u"inspiral", likelihood_func = None, likelihood_params_func = None, verbose = False, max_dt = None ): # # prepare the coincidence table interface. # if verbose: print >>sys.stderr, "indexing ..." coinc_tables = InspiralCoincTables(xmldoc, vetoes = veto_segments, program = trigger_program, likelihood_func = likelihood_func, likelihood_params_func = likelihood_params_func) coinc_def_id = ligolw_coincs.get_coinc_def_id(xmldoc, coinc_definer_row.search, coinc_definer_row.search_coinc_type, create_new = True, description = coinc_definer_row.description) sngl_index = dict((row.event_id, row) for row in lsctables.SnglInspiralTable.get_table(xmldoc)) # # build the event list accessors, populated with events from those # processes that can participate in a coincidence. apply vetoes by # removing events from the lists that fall in vetoed segments # eventlists = snglcoinc.make_eventlists(xmldoc, InspiralEventList, lsctables.SnglInspiralTable.tableName) if veto_segments is not None: for eventlist in eventlists.values(): iterutils.inplace_filter((lambda event: event.ifo not in veto_segments or event.get_end() not in veto_segments[event.ifo]), eventlist) # # set the \Delta t parameter on all the event lists # if max_dt is None: max_dt = inspiral_max_dt(lsctables.SnglInspiralTable.get_table(xmldoc), thresholds) if verbose: print >>sys.stderr, "event bisection search window will be %.16g s" % max_dt for eventlist in eventlists.values(): eventlist.set_dt(max_dt) # # replicate the ethinca parameter for every possible instrument # pair # thresholds = replicate_threshold(thresholds, set(eventlists)) # # construct offset vector assembly graph # time_slide_graph = snglcoinc.TimeSlideGraph(coinc_tables.time_slide_index, verbose = verbose) # # retrieve all coincidences, apply the final n-tuple compare func # and record the survivors # for node, coinc in time_slide_graph.get_coincs(eventlists, event_comparefunc, thresholds, verbose = verbose): coinc = tuple(sngl_index[event_id] for event_id in coinc) if not ntuple_comparefunc(coinc, node.offset_vector): coinc_tables.append_coinc(process_id, node.time_slide_id, coinc_def_id, coinc, effective_snr_factor) # # remove time offsets from events # del eventlists.offsetvector # # done # return xmldoc
def revert(xmldoc, program=process_program_name, verbose=False): # # remove entries from process metadata tables # if verbose: print >> sys.stderr, "removing process metadata ..." process_table = lsctables.ProcessTable.get_table(xmldoc) # IDs of things to delete process_ids = process_table.get_ids_by_program(program) iterutils.inplace_filter((lambda row: row.process_id not in process_ids), process_table) iterutils.inplace_filter((lambda row: row.process_id not in process_ids), lsctables.ProcessParamsTable.get_table(xmldoc)) # # remove coinc_event and coinc_event_map entries # if verbose: print >> sys.stderr, "removing coincs ..." coinc_event_table = lsctables.CoincTable.get_table(xmldoc) # IDs of things to delete coinc_ids = frozenset(row.coinc_event_id for row in coinc_event_table if row.process_id in process_ids) iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids), coinc_event_table) iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids), lsctables.CoincMapTable.get_table(xmldoc)) # IDs of things to keep time_slide_ids = frozenset(row.time_slide_id for row in coinc_event_table) coinc_def_ids = frozenset(row.coinc_def_id for row in coinc_event_table) # # remove time_slide and coinc_definer entries # if verbose: print >> sys.stderr, "removing coinc metadata ..." # coinc types to delete coinc_defs = frozenset( (row.search, row.search_coinc_type) for row in (InspiralSICoincDef, InspiralSCNearCoincDef, InspiralSCExactCoincDef)) iterutils.inplace_filter((lambda row: row.process_id not in process_ids or row.time_slide_id in time_slide_ids), lsctables.TimeSlideTable.get_table(xmldoc)) iterutils.inplace_filter( (lambda row: (row.search, row.search_coinc_type) not in coinc_defs or row.coinc_def_id in coinc_def_ids), lsctables.CoincDefTable.get_table(xmldoc))
def ligolw_rinca( xmldoc, process_id, EventListType, CoincTables, coinc_definer_row, event_comparefunc, thresholds, ntuple_comparefunc = lambda events, offset_vector: False, small_coincs = False, veto_segments = None, coinc_end_time_segment = None, verbose = False ): # # prepare the coincidence table interface. # if verbose: print >>sys.stderr, "indexing ..." coinc_tables = CoincTables(xmldoc, vetoes = veto_segments) coinc_def_id = llwapp.get_coinc_def_id(xmldoc, coinc_definer_row.search, coinc_definer_row.search_coinc_type, create_new = True, description = coinc_definer_row.description) sngl_index = dict((row.event_id, row) for row in lsctables.table.get_table(xmldoc, lsctables.SnglRingdownTable.tableName)) # # build the event list accessors, populated with events from those # processes that can participate in a coincidence. apply vetoes by # removing events from the lists that fall in vetoed segments # eventlists = snglcoinc.make_eventlists(xmldoc, EventListType, lsctables.SnglRingdownTable.tableName) if veto_segments is not None: for eventlist in eventlists.values(): iterutils.inplace_filter((lambda event: event.ifo not in veto_segments or event.get_start() not in veto_segments[event.ifo]), eventlist) # # set the \Delta t parameter on all the event lists # max_dt = ringdown_max_dt(lsctables.table.get_table(xmldoc, lsctables.SnglRingdownTable.tableName), thresholds) if verbose: print >>sys.stderr, "event bisection search window will be %.16g s" % max_dt for eventlist in eventlists.values(): eventlist.set_dt(max_dt) # # replicate the ds_sq threshold for every possible instrument # pair # thresholds = replicate_threshold(thresholds, set(eventlists)) # # construct offset vector assembly graph # time_slide_graph = snglcoinc.TimeSlideGraph(coinc_tables.time_slide_index, verbose = verbose) # # retrieve all coincidences, apply the final n-tuple compare func # and record the survivors # for node, coinc in time_slide_graph.get_coincs(eventlists, event_comparefunc, thresholds, include_small_coincs = small_coincs, verbose = verbose): ntuple = tuple(sngl_index[id] for id in coinc) if not ntuple_comparefunc(ntuple, node.offset_vector): coinc_tables.append_coinc(process_id, node, coinc_def_id, ntuple) # # remove time offsets from events # del eventlists.offsetvector # # done # return xmldoc
def revert(xmldoc, program = process_program_name, verbose = False): # # remove entries from process metadata tables # if verbose: print >>sys.stderr, "removing process metadata ..." process_table = lsctables.ProcessTable.get_table(xmldoc) # IDs of things to delete process_ids = process_table.get_ids_by_program(program) iterutils.inplace_filter((lambda row: row.process_id not in process_ids), process_table) iterutils.inplace_filter((lambda row: row.process_id not in process_ids), lsctables.ProcessParamsTable.get_table(xmldoc)) # # remove coinc_event and coinc_event_map entries # if verbose: print >>sys.stderr, "removing coincs ..." coinc_event_table = lsctables.CoincTable.get_table(xmldoc) # IDs of things to delete coinc_ids = frozenset(row.coinc_event_id for row in coinc_event_table if row.process_id in process_ids) iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids), coinc_event_table) iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids), lsctables.CoincMapTable.get_table(xmldoc)) # IDs of things to keep time_slide_ids = frozenset(row.time_slide_id for row in coinc_event_table) coinc_def_ids = frozenset(row.coinc_def_id for row in coinc_event_table) # # remove time_slide and coinc_definer entries # if verbose: print >>sys.stderr, "removing coinc metadata ..." # coinc types to delete coinc_defs = frozenset((row.search, row.search_coinc_type) for row in (InspiralSICoincDef, InspiralSCNearCoincDef, InspiralSCExactCoincDef)) iterutils.inplace_filter((lambda row: row.process_id not in process_ids or row.time_slide_id in time_slide_ids), lsctables.TimeSlideTable.get_table(xmldoc)) iterutils.inplace_filter((lambda row: (row.search, row.search_coinc_type) not in coinc_defs or row.coinc_def_id in coinc_def_ids), lsctables.CoincDefTable.get_table(xmldoc))
def ligolw_rinca(xmldoc, process_id, EventListType, CoincTables, coinc_definer_row, event_comparefunc, thresholds, ntuple_comparefunc=lambda events, offset_vector: False, small_coincs=False, veto_segments=None, coinc_end_time_segment=None, verbose=False): # # prepare the coincidence table interface. # if verbose: print >> sys.stderr, "indexing ..." coinc_tables = CoincTables(xmldoc, vetoes=veto_segments) coinc_def_id = llwapp.get_coinc_def_id( xmldoc, coinc_definer_row.search, coinc_definer_row.search_coinc_type, create_new=True, description=coinc_definer_row.description) sngl_index = dict((row.event_id, row) for row in lsctables.SnglRingdownTable.get_table(xmldoc)) # # build the event list accessors, populated with events from those # processes that can participate in a coincidence. apply vetoes by # removing events from the lists that fall in vetoed segments # eventlists = snglcoinc.EventListDict( EventListType, lsctables.SnglRingdownTable.get_table(xmldoc)) if veto_segments is not None: for eventlist in eventlists.values(): iterutils.inplace_filter( (lambda event: event.ifo not in veto_segments or event. get_start() not in veto_segments[event.ifo]), eventlist) # # set the \Delta t parameter on all the event lists # max_dt = ringdown_max_dt(lsctables.SnglRingdownTable.get_table(xmldoc), thresholds) if verbose: print >> sys.stderr, "event bisection search window will be %.16g s" % max_dt for eventlist in eventlists.values(): eventlist.set_dt(max_dt) # # replicate the ds_sq threshold for every possible instrument # pair # thresholds = replicate_threshold(thresholds, set(eventlists)) # # construct offset vector assembly graph # time_slide_graph = snglcoinc.TimeSlideGraph(coinc_tables.time_slide_index, verbose=verbose) # # retrieve all coincidences, apply the final n-tuple compare func # and record the survivors # for node, coinc in time_slide_graph.get_coincs(eventlists, event_comparefunc, thresholds, verbose=verbose): if len(coinc) < 2 or (len(coinc) < len(node.offset_vector) and not small_coincs): continue ntuple = tuple(sngl_index[id] for id in coinc) if not ntuple_comparefunc(ntuple, node.offset_vector): coinc_tables.append_coinc(*coinc_tables.coinc_rows( process_id, node, coinc_def_id, ntuple)) # # remove time offsets from events # del eventlists.offsetvector # # done # return xmldoc
def ligolw_thinca(xmldoc, process_id, coinc_definer_row, event_comparefunc, thresholds, max_dt, ntuple_comparefunc=default_ntuple_comparefunc, veto_segments=None, trigger_program=u"inspiral", likelihood_func=None, likelihood_params_func=None, min_instruments=2, min_log_L=None, verbose=False): # # validate input # if min_instruments < 1: raise ValueError("min_instruments (=%d) must be >= 1" % min_instruments) if min_log_L is not None and likelihood_func is None: raise ValueError("must supply likelihood_func to impose min_log_L cut") # # prepare the coincidence table interface. # if verbose: print >> sys.stderr, "indexing ..." coinc_tables = InspiralCoincTables( xmldoc, vetoes=veto_segments, program=trigger_program, likelihood_func=likelihood_func, likelihood_params_func=likelihood_params_func) coinc_def_id = ligolw_coincs.get_coinc_def_id( xmldoc, coinc_definer_row.search, coinc_definer_row.search_coinc_type, create_new=True, description=coinc_definer_row.description) sngl_inspiral_table = lsctables.SnglInspiralTable.get_table(xmldoc) sngl_index = dict((row.event_id, row) for row in sngl_inspiral_table) # # build the event list accessors, populated with events from those # processes that can participate in a coincidence. apply vetoes by # removing events from the lists that fall in vetoed segments # eventlists = snglcoinc.EventListDict( InspiralEventList, sngl_inspiral_table, instruments=set( coinc_tables.time_slide_table.getColumnByName("instrument"))) if veto_segments is not None: for eventlist in eventlists.values(): iterutils.inplace_filter( (lambda event: event.ifo not in veto_segments or event.end not in veto_segments[event.ifo]), eventlist) # # set the \Delta t parameter on all the event lists # if verbose: print >> sys.stderr, "event bisection search window will be %.16g s" % max_dt for eventlist in eventlists.values(): eventlist.set_dt(max_dt) # # replicate the ethinca parameter for every possible instrument # pair # thresholds = replicate_threshold(thresholds, eventlists) # # construct offset vector assembly graph # time_slide_graph = snglcoinc.TimeSlideGraph(coinc_tables.time_slide_index, verbose=verbose) # # retrieve all coincidences, apply the final n-tuple compare func # and record the survivors # for node, coinc in time_slide_graph.get_coincs(eventlists, event_comparefunc, thresholds, verbose=verbose): if len(coinc) < min_instruments: continue coinc = tuple(sngl_index[event_id] for event_id in coinc) if not ntuple_comparefunc(coinc, node.offset_vector): coinc, coincmaps, coinc_inspiral = coinc_tables.coinc_rows( process_id, node.time_slide_id, coinc_def_id, coinc) if min_log_L is None or coinc.likelihood >= min_log_L: coinc_tables.append_coinc(coinc, coincmaps, coinc_inspiral) # # done # return xmldoc
def ligolw_thinca(xmldoc, process_id, coinc_definer_row, event_comparefunc, thresholds, ntuple_comparefunc=default_ntuple_comparefunc, effective_snr_factor=250.0, veto_segments=None, trigger_program=u"inspiral", likelihood_func=None, likelihood_params_func=None, verbose=False, max_dt=None): # # prepare the coincidence table interface. # if verbose: print >> sys.stderr, "indexing ..." coinc_tables = InspiralCoincTables( xmldoc, vetoes=veto_segments, program=trigger_program, likelihood_func=likelihood_func, likelihood_params_func=likelihood_params_func) coinc_def_id = ligolw_coincs.get_coinc_def_id( xmldoc, coinc_definer_row.search, coinc_definer_row.search_coinc_type, create_new=True, description=coinc_definer_row.description) sngl_index = dict((row.event_id, row) for row in lsctables.SnglInspiralTable.get_table(xmldoc)) # # build the event list accessors, populated with events from those # processes that can participate in a coincidence. apply vetoes by # removing events from the lists that fall in vetoed segments # eventlists = snglcoinc.make_eventlists( xmldoc, InspiralEventList, lsctables.SnglInspiralTable.tableName) if veto_segments is not None: for eventlist in eventlists.values(): iterutils.inplace_filter( (lambda event: event.ifo not in veto_segments or event.end not in veto_segments[event.ifo]), eventlist) # # set the \Delta t parameter on all the event lists # if max_dt is None: max_dt = inspiral_max_dt(lsctables.SnglInspiralTable.get_table(xmldoc), thresholds) if verbose: print >> sys.stderr, "event bisection search window will be %.16g s" % max_dt for eventlist in eventlists.values(): eventlist.set_dt(max_dt) # # replicate the ethinca parameter for every possible instrument # pair # thresholds = replicate_threshold(thresholds, set(eventlists)) # # construct offset vector assembly graph # time_slide_graph = snglcoinc.TimeSlideGraph(coinc_tables.time_slide_index, verbose=verbose) # # retrieve all coincidences, apply the final n-tuple compare func # and record the survivors # for node, coinc in time_slide_graph.get_coincs(eventlists, event_comparefunc, thresholds, verbose=verbose): coinc = tuple(sngl_index[event_id] for event_id in coinc) if not ntuple_comparefunc(coinc, node.offset_vector): coinc_tables.append_coinc(process_id, node.time_slide_id, coinc_def_id, coinc, effective_snr_factor) # # remove time offsets from events # del eventlists.offsetvector # # done # return xmldoc
def cluster_events(events, testfunc, clusterfunc, sortfunc=None, bailoutfunc=None, verbose=False): """ Cluster the events in an event list. testfunc will be passed a pair of events in random order, and must return 0 (or False) if they should be clustered. clusterfunc will be passed a pair of events in random order, and must return an event that is the "cluster" of the two. clusterfunc is free to return a new events, or modify one or the other of its parameters in place and return it. If sortfunc and bailoutfunc are both not None (if one is provided the other must be as well), the events will be sorted into "increasing" order using sortfunc as a comparison operator, and then only pairs of events for which bailoutfunc returns 0 (or False) will be considered for clustering. The return value is True if the events in the event list were modified, and False if they were not (although their order might have changed). """ # changed indicates if the event list has changed changed = False while True: if verbose: progress = ProgressBar("clustering %d events" % len(events), max=len(events)) progress.show() else: progress = None if sortfunc is not None: events.sort(sortfunc) # outer_did_cluster indicates if the event list changes on # this pass outer_did_cluster = False i = 0 while i < len(events): if progress is not None: progress.update(i) if events[i] is not None: # inner_did_cluster indicates if events[i] # has changed inner_did_cluster = False for j, event_j in enumerate(events[i + 1:], 1): if event_j is not None: if not testfunc(events[i], event_j): events[i] = clusterfunc(events[i], event_j) events[i + j] = None inner_did_cluster = True elif (sortfunc is not None) and bailoutfunc( events[i], event_j): break if inner_did_cluster: outer_did_cluster = True # don't advance until events[i] # stops changing continue # events[i] has not changed i += 1 del progress # repeat until we do a pass without the listing changing if not outer_did_cluster: break iterutils.inplace_filter(lambda event: event is not None, events) changed = True return changed
def cluster_events(events, testfunc, clusterfunc, sortfunc = None, bailoutfunc = None, verbose = False): """ Cluster the events in an event list. testfunc will be passed a pair of events in random order, and must return 0 (or False) if they should be clustered. clusterfunc will be passed a pair of events in random order, and must return an event that is the "cluster" of the two. clusterfunc is free to return a new events, or modify one or the other of its parameters in place and return it. If sortfunc and bailoutfunc are both not None (if one is provided the other must be as well), the events will be sorted into "increasing" order using sortfunc as a comparison operator, and then only pairs of events for which bailoutfunc returns 0 (or False) will be considered for clustering. The return value is True if the events in the event list were modified, and False if they were not (although their order might have changed). """ # changed indicates if the event list has changed changed = False while True: if verbose: progress = ProgressBar("clustering %d events" % len(events), max = len(events)) progress.show() else: progress = None if sortfunc is not None: events.sort(sortfunc) # outer_did_cluster indicates if the event list changes on # this pass outer_did_cluster = False i = 0 while i < len(events): if progress is not None: progress.update(i) if events[i] is not None: # inner_did_cluster indicates if events[i] # has changed inner_did_cluster = False for j, event_j in enumerate(events[i + 1:], 1): if event_j is not None: if not testfunc(events[i], event_j): events[i] = clusterfunc(events[i], event_j) events[i + j] = None inner_did_cluster = True elif (sortfunc is not None) and bailoutfunc(events[i], event_j): break if inner_did_cluster: outer_did_cluster = True # don't advance until events[i] # stops changing continue # events[i] has not changed i += 1 del progress # repeat until we do a pass without the listing changing if not outer_did_cluster: break iterutils.inplace_filter(lambda event: event is not None, events) changed = True return changed