Beispiel #1
0
def symplify(sym, negate):
    """
    Turn one or more literal production symbol names into a
    list of ProductionSymbol objectes
    """

    from generator import ProductionSymbol as ps
    if isinstance(sym, (list, tuple)):
        return [ps(s, negate) for s in sym]
    else:
        return [ps(sym, negate)]
Beispiel #2
0
def configure():
    """
    A list of supported root words and their wordnet lemma key.
    Also the corresponding grammar mixed in there, mostly for
    easier configuration (NOT for readability, thou foul readability
    nazis). You can invoke the program with "-l WORD" to get a list
    of 'lemma keys' -> 'definitions' for a given word.
    """

    from generator import ClassifierGenerator

    # Supported vocabulary
    vocab = []

    # Supported grammar
    gramm = []

    # Classifier generator -- rules for generating classifiers
    gen = ClassifierGenerator()

    ################################################################################
    # Basic grammar and vocabulary
    ################################################################################

    # BE CAREFUL ABOUT WHAT YOU PUT FOR YOUR SYNNETS
    # Oh sure, "person" might seem like a good candidate, but just
    # remember that Nazis, Kings, Blacksmiths, and Gary Busey are
    # all technically a person. If it's TOO generic, give it a
    # lemma of None.

    # Sentences
    add_rule(gramm, 'S -> PART | S CONJ S | PART S | NOT S')
    add_rule(gramm, 'PART -> SEG | NEITHER SEG NOR SEG | BOTH SEG AND SEG')
    add_rule(gramm, 'SEG -> DESC | LPAREN S RPAREN')
    add_rule(gramm, 'DESC -> NON COREWORD | COREWORD | NON DROPWORD | DROPWORD')
    add_word(vocab, '(',       'LPAREN',     None,      (r'\(', r'\[', r'\{'))
    add_word(vocab, ')',       'RPAREN',     None,      (r'\)', r'\]', r'\}'))

    # Negation and conjugation
    add_rule(gramm, 'CONJ -> AND | OR')
    add_word(vocab, 'and',     'AND',        None,      ('but', 'with', 'holding', 'wearing', 'having', 'has', 'had', 'containing', 'has', 'and and'))
    add_word(vocab, 'not',     'NOT',        None,      ('no', 'without', 'lacking', 'missing', 'anti', "n't"))
    add_word(vocab, 'or',      'OR')
    add_word(vocab, 'non',     'NON')
    add_word(vocab, 'both',    'BOTH')
    add_word(vocab, 'neither', 'NEITHER')
    add_word(vocab, 'nor',     'NOR')

    # Drop words (words that we just ignore)
    add_rule(gramm, 'DROPWORD -> DET | PRONOUN | ADV | PREP | DVERB | ANON_PERSON | ANON_FACE | PAIR')
    add_word(vocab, 'person',  'ANON_PERSON',       None,      ('somebody', 'someone', 'people', 'human', 'anyone', 'anybody'))
    add_word(vocab, 'face',    'ANON_FACE',       ('face%1:08:00::', 'visage%1:08:00::'), ('face', 'visage'))
    add_word(vocab, 'a',       'DET',        None,      ('an', 'one', 'this', 'that', 'some', 'the', 'these', 'those', 'his',
                                                           'her', 'its', 'their'))
    add_word(vocab, 'of',      'PREP',       None,      ('in', 'on', 'from'))
    add_word(vocab, 'is',      'DVERB',      None,      ('is', 'was', 'be', 'been', 'being', 'are', "'s"))
    add_word(vocab, 'very',    'ADV',        None,      ('really', 'quite', 'understandably', 'noticibly', 'obviously',
                                                        'irritatingly', 'overtly', 'exactly', 'mostly', 'entirely', 'too'))
    add_word(vocab, 'pair',    'PAIR',       None,      ('set'))
    add_word(vocab, 'which',   'PRONOUN',    None,      ('he', 'she', 'who', 'it', 'they'))

    # Core words
    add_rule(gramm, 'COREWORD -> GENDER | RACE | AGE | ATTRACTIVE | COLOR | HAIR | PHOTO | ITEMS | FACE')

    # Gender
    add_rule(gramm, 'GENDER -> PROD_MALE | PROD_FEMALE | PROD_BOY | PROD_GIRL')
    add_word(vocab, 'male',   'PROD_MALE',   'man%1:18:00::',   ('man', 'men'))
    add_word(vocab, 'female', 'PROD_FEMALE', 'woman%1:18:00::', ('woman', 'women', 'chick', 'chicks'))
    add_word(vocab, 'girl',   'PROD_GIRL',   'girl%1:18:02::',  'girls')
    add_word(vocab, 'boy',    'PROD_BOY',    'boy%1:18:00::',   'boys')

    # Attractive
    add_rule(gramm, 'ATTRACTIVE -> PROD_ATTRACTIVE | PROD_UNATTRACTIVE')
    add_word(vocab, 'attractive',   'PROD_ATTRACTIVE',   ('attractive%3:00:01::', 'hot%5:00:00:sexy:00',
                                                          'sexy%3:00:00::', 'beautiful%3:00:00::'), None)
    add_word(vocab, 'unattractive', 'PROD_UNATTRACTIVE', ('unattractive%3:00:00::', 'ugly%3:00:00::'), 'fugly')

    # Colors
    add_rule(gramm, 'COLOR -> PROD_BLACK | PROD_WHITE | PROD_GRAY | PROD_BLONDE | PROD_BROWN')
    add_word(vocab, 'black',  'PROD_BLACK',  'black%1:07:00::')
    add_word(vocab, 'blond',  'PROD_BLONDE', 'blond%1:18:00::', ('light-haired'))
    add_word(vocab, 'brown',  'PROD_BROWN',  'brown%1:07:00::')
    add_word(vocab, 'gray',   'PROD_GRAY',   'gray%1:07:00::')
    add_word(vocab, 'white',  'PROD_WHITE',  'white%1:07:00::')

    # Race
    add_rule(gramm, 'RACE -> PROD_ASIAN | PROD_INDIAN | PROD_WHITE | PROD_BLACK')
    add_word(vocab, 'black',  'PROD_BLACK',  ('black%1:18:00::'))                             # Insert slurs here
    add_word(vocab, 'asian',  'PROD_ASIAN',  ('asian%1:18:00::', 'asia%1:14:00::'), 'asians') # Or here
    add_word(vocab, 'white',  'PROD_WHITE',  ('white%1:18:00::'))                             # Or here
    add_word(vocab, 'indian', 'PROD_INDIAN', ('indian%1:18:01::', 'india%1:15:00::'))         # Or here

    # Hair
    add_rule(gramm, 'HAIR -> PROD_CURLY | PROD_STRAIGHT | PROD_WAVY | PROD_HAIR | PROD_BALDING | PROD_BALD')
    add_word(vocab, 'curly',    'PROD_CURLY')
    add_word(vocab, 'straight', 'PROD_STRAIGHT')
    add_word(vocab, 'wavy',     'PROD_WAVY')
    add_word(vocab, 'bald',     'PROD_BALD', ('bald%5:00:00:hairless:00', 'hairless%3:00:00::'))
    add_word(vocab, 'balding',  'PROD_BALDING', None, ('receding hairline', 'hairline receding'))
    add_word(vocab, 'hair',     'PROD_HAIR', None, 'haired')

    # Age
    add_rule(gramm, 'AGE -> PROD_OLD')
    add_word(vocab, 'old', 'PROD_OLD', 'old%3:00:02::')

    # Photo types
    add_rule(gramm, 'PHOTO -> IMAGE | PHOTOTYPE')
    add_rule(gramm, 'PHOTOTYPE -> PROD_POSED | PROD_NOT_POSED | PROD_COLOR | PROD_NOT_COLOR')
    add_word(vocab, 'image',   'IMAGE', ('image%1:06:00::', 'photo%1:06:00::', 'photograph%1:06:00::', 'picture%1:06:00::'),
                                        ('photo', 'photograph', 'picture'))
    add_word(vocab, 'posed',   'PROD_POSED', None, 'prepared')
    add_word(vocab, 'candid',  'PROD_NOT_POSED', None, ('surprised?'))
    add_word(vocab, 'color',   'PROD_COLOR', 'color%1:07:01::', ('colorful', 'vibrant'))
    add_word(vocab, 'b&w',     'PROD_NOT_COLOR', 'monochromatic%5:00:00:colored:00', ('colorless', 'black and white', 'monochromatic'))

    # Items (hats, etc)
    add_rule(gramm, 'ITEMS -> GLASSES | PROD_HAT')
    add_rule(gramm, 'GLASSES -> PROD_GLASSES | PROD_SUNGLASSES')
    add_word(vocab, 'glasses',    'PROD_GLASSES', 'glasses%1:06:00::')
    add_word(vocab, 'sunglasses', 'PROD_SUNGLASSES', 'sunglasses%1:06:00::')
    add_word(vocab, 'hat',        'PROD_HAT', 'hat%1:06:00::')
    
    # Facial features
    add_rule(gramm, 'FACE -> PROD_SMILING | PROD_FROWNING | PROD_DOUBLECHIN | PROD_CHUBBY' +
                   ' | PROD_NOT_CHUBBY | PROD_ANGRY | PROD_HIGH | PROD_CHEEKBONE | PROD_BIG' +
                   ' | PROD_NOSE | PROD_SMALL | PROD_OVAL ANON_FACE')
    add_word(vocab, 'smiling',     'PROD_SMILING',  ('smiling%1:10:00::', 'smile%2:29:00::', 'smile%2:32:00::'), None)
    add_word(vocab, 'frowning',    'PROD_FROWNING', ('frowning%5:00:00:displeased:00', 'frown%1:10:00::', 'frown%2:29:00::'), None)
    add_word(vocab, 'double-chin', 'PROD_DOUBLECHIN', None, ('double chin', 'two chins', 'a chin count of two', 'chins of two', 'two-chinned'))
    add_word(vocab, 'chubby',      'PROD_CHUBBY', None, ('husky', 'big[ -]boned'))
    add_word(vocab, 'skinny',      'PROD_NOT_CHUBBY')
    add_word(vocab, 'angry',       'PROD_ANGRY', 'angry%3:00:00::', ('upset', 'pissed off'))
    add_word(vocab, 'high',        'PROD_HIGH', ('high%3:00:01::', 'high%3:00:02::'))
    add_word(vocab, 'cheekbone',   'PROD_CHEEKBONE', ('cheekbone%1:08:00::', 'cheek%1:08:00::'), ('cheeks', 'cheekbones'))
    add_word(vocab, 'big',         'PROD_BIG', 'big%3:00:01::')
    add_word(vocab, 'small',       'PROD_SMALL', 'small%3:00:00::')
    add_word(vocab, 'nose',        'PROD_NOSE', 'nose%1:08:00::')
    add_word(vocab, 'oval',        'PROD_OVAL', 'oval%5:00:00:rounded:00')

    ################################################################################
    # Generator rules
    ################################################################################

    from generator import ClassifierResult as cr
    from generator import ProductionSymbol as ps
    from generator import UnsupportedSearch

    # This is a wicked powerful statement, lemme explain
    # When the PROD_MALE symbol is encountered, it calls this labmda function with two paramters:
    #   s: a tuple of ProductionSymbol objects (in this case, just PROD_MALE)
    #   d: desperate (boolean). Hard to explain, but basically this affects wheter or not
    #      to defer processing of the symbol until later. If this is false, the parser is
    #      not desperate, and it's up to the lambda function to defer. If it's true, well
    #      it's time to give it up. Very useful for things like "attractive female".
    simple_map(gen, 'PROD_MALE', lambda s,d: cr('Male', -1 if s[0].negate() else 1) if d else None)
    simple_map(gen, 'PROD_FEMALE', lambda s,d: cr('Male', 1 if s[0].negate() else -1) if d else None)
    simple_map(gen, 'PROD_BLACK', lambda s,d: cr('Black', -1 if s[0].negate() else 1) if d else None)

    # Since attractive female is a multi-symbol concept, simple_map won't work, here's how to
    # call off to this thing for something more complicated. Note that this only looks for
    # attractive / unattractive FEMALE (see the negating of PROD_MALE?). Any queries for an
    # attractive MALE will fail. Heh. Male fail. God it's late.
    attractive_female = cr('Attractive Woman', 1)
    unattractive_female = cr('Attractive Woman', -1)
    gen.add_mapping( (ps('PROD_MALE', True),     ps('PROD_ATTRACTIVE', False)),    (attractive_female) )
    gen.add_mapping( (ps('PROD_FEMALE', False),  ps('PROD_ATTRACTIVE', False)),    (attractive_female) )
    gen.add_mapping( (ps('PROD_MALE', True),     ps('PROD_UNATTRACTIVE', True)),   (attractive_female) )
    gen.add_mapping( (ps('PROD_FEMALE', False),  ps('PROD_UNATTRACTIVE', True)),   (attractive_female) )
    gen.add_mapping( (ps('PROD_MALE', True),     ps('PROD_ATTRACTIVE', True)),     (unattractive_female) )
    gen.add_mapping( (ps('PROD_FEMALE', False),  ps('PROD_ATTRACTIVE', True)),     (unattractive_female) )
    gen.add_mapping( (ps('PROD_MALE', True),     ps('PROD_UNATTRACTIVE', False)),  (unattractive_female) )
    gen.add_mapping( (ps('PROD_FEMALE', False),  ps('PROD_UNATTRACTIVE', False)),  (unattractive_female) )

    # And now you know the hard stuff, so here's a really simple one. This will automatically
    # take care of the positive and negative cases. The values, by default, map to 1 and -1.
    simple_map(gen, 'PROD_SMILING', 'Smiling')
    simple_map(gen, 'PROD_ASIAN', 'Asian')
    simple_map(gen, 'PROD_DOUBLECHIN', 'Double Chin')
    simple_map(gen, 'PROD_ANGRY', ('Frowning', 'Arched Eyebrows'))
    simple_map(gen, 'PROD_HAT', 'Wearing Hat')
    simple_map(gen, ('PROD_HIGH', 'PROD_CHEEKBONE'), 'High Cheekbones')
    simple_map(gen, 'PROD_OVAL', 'Oval Face')
    simple_map(gen, 'PROD_OLD', 'Senior')

    # Also fairly simple with the "NOT" words reversing the logic
    simple_map(gen, 'PROD_POSED', 'Posed Photo')
    simple_map(gen, 'PROD_NOT_POSED', 'Posed Photo', -1, 1)
    simple_map(gen, 'PROD_CHUBBY', 'Chubby')
    simple_map(gen, 'PROD_NOT_CHUBBY', 'Chubby', -1, 1)

    # Hair types
    gen.add_mapping( (ps('PROD_CURLY', False), ps('PROD_HAIR', False)), cr('Curly Hair', 1) )
    gen.add_mapping( (ps('PROD_CURLY', True),  ps('PROD_HAIR', True)),  cr('Curly Hair', -1) )
    gen.add_mapping( (ps('PROD_CURLY', True),  ps('PROD_HAIR', False)), cr('Curly Hair', -1) )
    gen.add_mapping( (ps('PROD_BLACK', False), ps('PROD_HAIR', False)), cr('Black Hair', 1) )
    gen.add_mapping( (ps('PROD_BLACK', True),  ps('PROD_HAIR', True)),  cr('Black Hair', -1) )
    gen.add_mapping( (ps('PROD_BLACK', True),  ps('PROD_HAIR', False)), cr('Black Hair', -1) )

    # Hair problems
    simple_map(gen, 'PROD_BALD', 'Bald')
    gen.add_mapping( ps('PROD_HAIR', True), lambda s,d: cr('Bald', -1 if s[0].negate() else 1) if d else None )

    # Facial features
    gen.add_mapping( (ps('PROD_BIG', False), ps('PROD_NOSE', False)), cr('Big Nose', 1) )
    gen.add_mapping( (ps('PROD_BIG', True), ps('PROD_NOSE', False)), cr('Big Nose', -1) )
    gen.add_mapping( (ps('PROD_BIG', True), ps('PROD_NOSE', True)), cr('Big Nose', -1) )
    gen.add_mapping( (ps('PROD_SMALL', False), ps('PROD_NOSE', False)), cr('Big Nose', -1) )
    gen.add_mapping( (ps('PROD_SMALL', True), ps('PROD_SMALL', False)), cr('Big Nose', 1) )
    gen.add_mapping( (ps('PROD_SMALL', True), ps('PROD_NOSE', True)), cr('Big Nose', 1) )

    # Glasses. God I hate you glasses.
    eyeglasses = cr('Eyeglasses', 1)
    sunglasses = cr('Sunglasses', 1)
    no_glasses = cr('No Eyewear', 1)
    gen.add_mapping( ps('PROD_GLASSES',    False), eyeglasses ) # A search for "glasses"
    gen.add_mapping( ps('PROD_GLASSES',    True),  no_glasses ) # A search for "no glasses"
    gen.add_mapping( ps('PROD_SUNGLASSES', False), sunglasses ) # A search for "sunglasses"
    gen.add_mapping( ps('PROD_SUNGLASSES', True),  no_glasses ) # A search for "no sunglasses"

    # Returning multiple classifiers
    gen.add_mapping( ps('PROD_BOY', False), ( cr('Male', 1), cr('Child', 1) ) )
    gen.add_mapping( ps('PROD_GIRL', False), ( cr('Male', -1), cr('Child', 1) ) )

    # Disallowing certain searches. In this case, "not boy" is actually impossible
    # to search for, thanks to the way the classifiers are set up. Same with "not girl".
    gen.add_mapping( ps('PROD_BOY', True),  UnsupportedSearch('non-boy') )
    gen.add_mapping( ps('PROD_GIRL', True), UnsupportedSearch('non-girl') )

    ################################################################################
    # Grammar generation, do not touch
    ################################################################################

    # The rest of this builds our grammar from our vocab
    productions = {}
    for baseword in vocab:
        if baseword is None or baseword.production is None or len(baseword.production.strip()) == 0:
            continue
        prod = baseword.production.strip().upper()
        if not productions.has_key(prod):
            productions[prod] = []
        if baseword.word not in productions[prod]:
            productions[prod].append(baseword.word)

    keys = productions.keys()
    keys.sort()
    for prod in keys:
        line = prod + ' -> '
        items = productions[prod]
        items.sort()
        line += '"' + '" | "'.join(items) + '"'
        add_rule(gramm, line)

    grammar = '\n'.join(gramm)

    return (vocab, grammar, gen)