예제 #1
0
def normalise_datadict(datadict, cut_to):
    """Take a dictionary of groundtruth and cut all classes to
    `cut_to` items. If a class has fewer items, discard it.
    Return newdatadict, removed where `removed` is a dictionary
    of items in datadict that haven't been added to newdatadict"""

    dataset = collections.defaultdict(list)
    for r, cls in datadict.items():
        dataset[cls].append(r)
    newdataset = {}
    remaining = {}
    for cls, items in dataset.items():
        if len(items) > cut_to:
            sample = random.sample(items, cut_to)
            for i in sample:
                newdataset[i] = cls
            rest = list(set(items) - set(sample))
            for i in rest:
                remaining[i] = cls
    return newdataset, remaining
def normalise_datadict(datadict, cut_to):
    """Take a dictionary of groundtruth and cut all classes to
    `cut_to` items. If a class has fewer items, discard it.
    Return newdatadict, removed where `removed` is a dictionary
    of items in datadict that haven't been added to newdatadict"""

    dataset = collections.defaultdict(list)
    for r, cls in datadict.items():
        dataset[cls].append(r)
    newdataset = {}
    remaining = {}
    for cls, items in dataset.items():
        if len(items) > cut_to:
            sample = random.sample(items, cut_to)
            for i in sample:
                newdataset[i] = cls
            rest = list(set(items)-set(sample))
            for i in rest:
                remaining[i] = cls
    return newdataset, remaining