Beispiel #1
0
def label_peaks_parsimonious(peaks, known_emission_lines, delta):
    '''
    Out of all the peaks that are within +/-delta of at least one
    known emission line, find a minimal set of elements
    (via unweighted greedy set cover) that "explains" all such peaks
    '''
    known_emission_lines = known_emission_lines.sort_values(by="wav_mars")
    peaks = sorted(peaks)
    elts_to_peak_indices = defaultdict(set)
    for row_index, row in known_emission_lines.iterrows():
        elt = row["elt"]
        i_lo, i_hi = nearby_peaks(peaks, row["wav_mars"], delta = delta)
        for i in xrange(i_lo, i_hi):
            elts_to_peak_indices[elt].add(i)
    sets = []
    elts = sorted(list(np.unique(known_emission_lines["elt"])))
    for elt in elts:
        sets.append(list(elts_to_peak_indices[elt]))

    best_cover = set_cover_approx_fast(sets)

    cover_elts_to_peaks = OrderedDict()
    cover_peaks_to_elts = OrderedDict()
    unlabeled = set(peaks)
    for i in best_cover:
        elt = elts[i]
        for peak_index in elts_to_peak_indices[elt]:
            peak = peaks[peak_index]
            if elt not in cover_elts_to_peaks:
                cover_elts_to_peaks[elt] = []
            cover_elts_to_peaks[elt].append(peak)
            if peak not in cover_peaks_to_elts:
                cover_peaks_to_elts[peak] = []
            cover_peaks_to_elts[peak].append(elt)
            unlabeled -= set([peak])

    return cover_elts_to_peaks, cover_peaks_to_elts, sorted(list(unlabeled))
def test_set_cover_approx_fast():
    result = set_cover_approx_fast(example_sets)
    assert result == [4, 3, 2]

    result = set_cover_approx_fast([[30], [20,30], [10]])
    assert result == [1, 2]