Ejemplo n.º 1
0
def make_colorizer():
    from csc.divisi2 import examples
    log.info('Loading ConceptNet matrix')
    sa = examples.spreading_activation()
    log.info('Loading color matrix')
    colors = make_color_matrix()
    log.info('Building colorizer')
    return Colorizer(sa, colors)
Ejemplo n.º 2
0
def training_and_test_data():
    print "Constructing test/train from xkcd"
    train = defaultdict(list)
    test = defaultdict(list)
    combined = defaultdict(list)

    with open('grouped_color_data.txt') as inputlines:
        for line in inputlines:
            try:
                colorname, userid, r, g, b, monitor, colorblind, male = line.strip().split('|')
            except ValueError:
                continue
            rgblist = [float(r), float(g), float(b)]
            combined[colorname].append(rgblist)
            print colorname.decode('utf-8').encode('ascii', 'replace')
    
    #for colorname in combined.keys():
    #    if len(combined[colorname]) < 3:
    #        del combined[colorname]
    
    from csc.divisi2.examples import spreading_activation
    spread = spreading_activation()
    import random
    possible_concepts = set(spread.row_labels) & set(combined)
    test_concepts = random.sample(possible_concepts, 200)
    out = open('test_concepts.txt', 'w')
    for concept in test_concepts:
        print "Test concept:", concept
        print >> out, concept
    out.close()

    training = True
    for colorname in combined:
        concepts = en.nl.extract_concepts(colorname, check_conceptnet=True)
        censored = False
        for concept in concepts:
            if concept in test_concepts:
                censored = True
        if censored: target = test
        else: target = train
        target[colorname] = combined[colorname]
    return train, test
Ejemplo n.º 3
0
def run_leave_n_out():
    from csc.divisi2 import examples
    import colors
    log.info('Loading ConceptNet matrix')
    sa = examples.spreading_activation()
    log.info('Loading test input')
    train_input, test_input = training_and_test_data()
    log.info('Loading test data')
    test = make_test_data()
    log.info('Building colorizer')
    cmatrix = make_color_matrix()
    colorizer = Colorizer(sa, cmatrix)

    dist_dict = {}
    baseline_dict = {}
    distances = {
        'baseline': [],
        'weighted': [],
        'inter_annotator': [],
    }

    test_concepts = set(test.keys()) & set(sa.row_labels)
    for colorname in test_concepts:
        try:
            labout = tuple(colorizer.lab_color_for_concept(colorname)[:3])
        except TypeError:
            continue


        labact = test[colorname]
        rgbact = lab_to_rgb(labact)
        dist = euclid(labout,labact)
        
        distances['weighted'].append(dist)

        baseline = euclid([50, 0, 0], labact)
        distances['baseline'].append(baseline)
        
        #try:
        #    wnout = tuple(colorizer.lab_color_for_concept_wordnet(colorname)[:3])
        #    wndist = euclid(wnout, labact)
        #    distances['wordnet'].append(wndist)
        #    rgbwnout = lab_to_rgb(wnout)
        #    print colorname, '(wordnet)', rgbact, rgbwnout, str(wndist)
        #except TypeError:
        #    pass

        #try:
        #    prismdata = [rgb_to_lab([color.__r, color.__g, color.__b]) for color in colors.prism(colorname)]
        #    prism_avg = numpy.mean(numpy.array(prismdata), axis=0)
        #    prism_dist = euclid(prism_avg, labact)
        #    distances['nodebox_prism'].append(prism_dist)
        #except ZeroDivisionError:
        #    pass

        inter_annotator = euclid(rgb_to_lab(test_input[colorname][0]), 
                                 labact)
        if inter_annotator == 0:
            inter_annotator = euclid(rgb_to_lab(test_input[colorname][1]), 
                                     labact)
        distances['inter_annotator'].append(inter_annotator)
        
        rgbout = lab_to_rgb(labout)

        print colorname, rgbact, rgbout, str(dist)
    
    totals = {}
    for key, values in distances.items():
        totals[key] = sum(values)/len(values)
    totals['total'] = len(distances['weighted'])
    print totals
    return totals