Ejemplo n.º 1
0
    def exists(cls, text, language, is_raw=False):
        '''
        Determine if a concept exists in ConceptNet.

        If `is_raw` is True, `text` is considered to be already in the
        raw (normalized) concept form. Otherwise, it is normalized
        before being checked in the database.
        '''
        if not isinstance(language, Language):
            language = Language.get(language)
        if not is_raw:
            surface = SurfaceForm.get(text, language, False)
            if surface is not None: return True
            text = language.nl.normalize(text)

        return cls.exists_raw(text, language)
Ejemplo n.º 2
0
    def exists(cls, text, language, is_raw=False):
        '''
        Determine if a concept exists in ConceptNet.

        If `is_raw` is True, `text` is considered to be already in the
        raw (normalized) concept form. Otherwise, it is normalized
        before being checked in the database.
        '''
        if not isinstance(language, Language):
            language = Language.get(language)
        if not is_raw:
            surface = SurfaceForm.get(text, language, False)
            if surface is not None: return True
            text = language.nl.normalize(text)

        return cls.exists_raw(text, language)
Ejemplo n.º 3
0
    def get(cls, text, language, auto_create=False):
        """
        Get the Concept represented by a given string of text.

        If the Concept does not exist, this method will return None by default.
        However, if the parameter ``auto_create=True`` is given, then this will
        create the Concept (adding it to the database) instead.
        
        You should not run the string through a normalizer, or use a string
        which came from :attr:`Concept.text` (which is equivalent). If you
        have a normalized string, you should use :meth:`get_raw` instead.
        """
        if not isinstance(language, Language):
            language = Language.get(language)
        surface = SurfaceForm.get(text, language, auto_create)
        if surface is None:
            return Concept.get_raw(language.nl.normalize(text), language)
        return surface.concept
Ejemplo n.º 4
0
    def get(cls, text, language, auto_create=False):
        """
        Get the Concept represented by a given string of text.

        If the Concept does not exist, this method will return None by default.
        However, if the parameter ``auto_create=True`` is given, then this will
        create the Concept (adding it to the database) instead.
        
        You should not run the string through a normalizer, or use a string
        which came from :attr:`Concept.text` (which is equivalent). If you
        have a normalized string, you should use :meth:`get_raw` instead.
        """
        if not isinstance(language, Language):
            language = Language.get(language)
        surface = SurfaceForm.get(text, language, auto_create)
        if surface is None:
            return Concept.get_raw(language.nl.normalize(text), language)
        return surface.concept
Ejemplo n.º 5
0
    def get(text, lang, auto_create=False):
        if isinstance(lang, basestring):
            lang = Language.get(lang)
        nl = lang.nl
        try:
            known = SurfaceForm.objects.get(language=lang, text=text)
            return known
        except SurfaceForm.DoesNotExist:
            if not auto_create:
                return None
            else:
                lemma, residue = nl.lemma_factor(text)
                concept, created = Concept.objects.get_or_create(language=lang, text=lemma)
                if created: concept.save()

                # use get_or_create so it's atomic
                surface_form, _ = SurfaceForm.objects.get_or_create(concept=concept,
                text=text, residue=residue, language=lang)
                return surface_form
Ejemplo n.º 6
0
    def get(text, lang, auto_create=False):
        if isinstance(lang, basestring):
            lang = Language.get(lang)
        nl = lang.nl
        try:
            known = SurfaceForm.objects.get(language=lang, text=text)
            return known
        except SurfaceForm.DoesNotExist:
            if not auto_create:
                return None
            else:
                lemma, residue = nl.lemma_factor(text)
                concept, created = Concept.objects.get_or_create(language=lang,
                                                                 text=lemma)
                if created: concept.save()

                # use get_or_create so it's atomic
                surface_form, _ = SurfaceForm.objects.get_or_create(
                    concept=concept, text=text, residue=residue, language=lang)
                return surface_form
Ejemplo n.º 7
0
 def read(self, request, lang):
     try:
         lang = Language.get(lang)
         return {'id': lang.id, 'sentence_count': lang.sentence_count}
     except Language.DoesNotExist:
         return rc.NOT_FOUND
Ejemplo n.º 8
0
__version__ = "4.0rc2"
from django.db import models
from django.db.models import Q
from conceptnet.corpus.models import Language, Sentence, User, ScoredModel, Frequency
from events.models import Event, Activity
from voting.models import Vote, SCORES
from django.contrib.contenttypes import generic
from csc_utils.cache import cached
from datetime import datetime
from urllib import quote as urlquote
import re

DEFAULT_LANGUAGE = en = Language(id='en', name='English')


class TimestampedModel(models.Model):
    created = models.DateTimeField(default=datetime.now)
    updated = models.DateTimeField()

    def save(self, **kwargs):
        self.updated = datetime.now()
        super(TimestampedModel, self).save(**kwargs)

    class Meta:
        abstract = True


class UserData(TimestampedModel):
    user = models.ForeignKey(User)
    activity = models.ForeignKey(Activity)
Ejemplo n.º 9
0
 def read(self, request, lang):
     try:
         lang = Language.get(lang)
         return {'id': lang.id, 'sentence_count': lang.sentence_count}
     except Language.DoesNotExist:
         return rc.NOT_FOUND
Ejemplo n.º 10
0
                # Raise again
                raise e

    # Process sentences
    page_range = [p for p in paginator.page_range if p >= start_page]
    for i in page_range:
        sentences = paginator.page(i).object_list
        
        # Update progress
        batch.status = "process_sentence_batch " + str(i) + "/" + str(paginator.num_pages)
        batch.progress_num = i
        batch.progress_den = paginator.num_pages
        batch.save()

        try: do_batch(sentences)
        
        except Exception, e: #improbable exception for now
            batch.status = "process_sentence_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!"
            batch.remarks = str(e.sentence) + "\n" + str(e) + "\n" + e.tb
            print "***TRACEBACK***"
            print batch.remarks
            batch.save()
            raise e


if __name__ == '__main__':
    user = User.objects.get(username='******')
    lang = Language.get('en')
    run(user, lang, start_page=50000)

Ejemplo n.º 11
0
    # Process sentences
    page_range = [p for p in paginator.page_range if p >= start_page]
    for i in page_range:
        sentences = paginator.page(i).object_list

        # Update progress
        batch.status = "process_sentence_batch " + str(i) + "/" + str(
            paginator.num_pages)
        batch.progress_num = i
        batch.progress_den = paginator.num_pages
        batch.save()

        try:
            do_batch(sentences)

        except Exception, e:  #improbable exception for now
            batch.status = "process_sentence_batch " + str(i) + "/" + str(
                paginator.num_pages) + " ERROR!"
            batch.remarks = str(e.sentence) + "\n" + str(e) + "\n" + e.tb
            print "***TRACEBACK***"
            print batch.remarks
            batch.save()
            raise e


if __name__ == '__main__':
    user = User.objects.get(username='******')
    lang = Language.get('en')
    run(user, lang, start_page=50000)
Ejemplo n.º 12
0
import nltk
from collections import defaultdict
from nltk.cfg import Nonterminal
from divisi.util import get_picklecached_thing
from conceptnet.corpus.models import Pattern, Sentence, Language
from simplenlp.euro import tokenize, untokenize
from nltk.corpus.reader import BracketParseCorpusReader
from nltk.corpus.util import LazyCorpusLoader
import string

treebank_brown = LazyCorpusLoader(
    'treebank/combined', BracketParseCorpusReader, r'c.*\.mrg')
#treebank_brown = None

en = Language.get('en')

# Patterns are 4-tuples of:
# (relative probability, predtype, polarity, expression)

patterns = [
(1.0, 'HasFirstSubevent', 'the first thing you do when you {VP:1} is {VP:2}'),
(1.0, 'HasLastSubevent', 'the last thing you do when you {VP:1} is {VP:2}'),
(1.0, 'HasPrerequisite', 'something you need to do before you {VP:1} is {VP:2}'),
(1.0, 'MadeOf', '{NP:1} {BE} {ADVP:a} made of {NP:2}'),
(1.0, 'IsA', '{NP:1} {BE} a kind of {NP:2} {POST:0}'),
(1.0, 'IsA', '{NP:1} {BE} a sort of {NP:2} {POST:0}'),
(1.0, 'IsA', '{NP:1} {BE} a type of {NP:2} {POST:0}'),
(1.0, 'AtLocation', 'somewhere {NP:1} can be is {P} {NP:2}'),
(1.0, 'AtLocation', 'somewhere {NP:1} can be is {NP:2}'),
(1.0, 'AtLocation', 'you are likely to find {NP:1} {P} {NP:2}'),
(0.1, 'AtLocation', '{NP:1} can be {P} {NP:2}'),