Exemple #1
0
def fetch_trigrams():

    for tag in TARGET_TAGS:

        # This manual transfer to a list seems necessary, instead of interating
        # through the selected lines, due to an exception that is thrown
        # in the latter case.
        tag_pairs = []
        for bg in Bigram.select().where(Bigram.tag1 == tag):
            tag_pairs.append((bg.tag1, bg.tag2))

        for pair in tag_pairs:
            resp = session.get(make_url([pair[0], pair[1]]), params={
                'pagesize': 100,
                'site': 'stackoverflow',
            })
            respJson = resp.json()
            if 'items' not in respJson.keys():
                continue

            for i in respJson['items']:
                tags = [pair[0], pair[1], i['name']]
                tags_ord = sorted(tags)
                try:
                    tg = Trigram.create(tag1=tags_ord[0], tag2=tags_ord[1], tag3=tags_ord[2])
                except peewee.IntegrityError:
                    tg = Trigram.get(
                        Trigram.tag1 == tags_ord[0],
                        Trigram.tag2 == tags_ord[1],
                        Trigram.tag3 == tags_ord[2],
                    )
                tg.count = i['count']
                tg.save()
Exemple #2
0
def fetch_bigrams():
    for tag in TARGET_TAGS:
        resp = session.get(make_url([tag]), params={
            'pagesize': 100,
            'site': 'stackoverflow',
        })
        respJson = resp.json()
        for i in respJson['items']:
            bg, _ = Bigram.get_or_create(tag1=tag, tag2=i['name'])
            bg.count = i['count']
            bg.save()
Exemple #3
0
        # convert variables to the right types
        xloc = float(xloc)
        yloc = float(yloc)
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parse)
        modparse = ParentedTree.parse(modparse)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        # sample `args.iterations` times for each sentence
        for _ in xrange(args.iterations):
            lmk, rel = get_meaning(loc, num_ancestors)

            if args.verbose:
                print "utterance:", repr(sentence)
                print "location: %s" % repr(loc)
                print "landmark: %s (%s)" % (lmk, lmk_id(lmk))
                print "relation: %s" % rel_type(rel)
                print "parse:"
                print parse.pprint()
                print "modparse:"
                print modparse.pprint()
                print "-" * 70

            location = Location(x=xloc, y=yloc)
            save_tree(modparse, location, rel, lmk)
            Bigram.make_bigrams(location.words)
            Trigram.make_trigrams(location.words)
            session.commit()
Exemple #4
0
            assert(not isinstance(rel, tuple))

            if args.verbose:
                print 'utterance:', repr(sentence)
                print 'location: %s' % repr(loc)
                print 'landmark: %s (%s)' % (lmk, lmk_id(lmk))
                print 'relation: %s' % rel_type(rel)
                print 'parse:'
                print parse.pprint()
                print 'modparse:'
                print modparse.pprint()
                print '-' * 70

            location = Location(x=xloc, y=yloc)
            save_tree(modparse, location, rel, lmk)
            Bigram.make_bigrams(location.words)
            Trigram.make_trigrams(location.words)

        if i % 200 == 0: session.commit()

    for sentence,(parse,modparse) in unique_sentences.items():
        SentenceParse.add_sentence_parse_blind(sentence, parse, modparse)

    session.commit()

    print 'counting ...'

    # count words
    w1 = aliased(Word)
    w2 = aliased(Word)
    parent = aliased(Production)