Ejemplo n.º 1
0
    def nl(self):
        """
        A collection of natural language tools for a language.

        See :mod:`csc.nl` for more information on using these tools.
        """
        return get_nl(self.id)
Ejemplo n.º 2
0
 def __init__(self):
     #self.tensor = get_picklecached_thing('tensor.gz')
     #self.svd = self.tensor.svd(k=50)
     self.en_nl = get_nl('en')
     self.normalizer = LemmatizedEuroNL('en')
     self.cnet = conceptnet_2d_from_db('en')
     self.analogyspace = self.cnet.svd(k=100)
Ejemplo n.º 3
0
 def __init__(self):
     self.helper = DivsiHelper()
     self.cnet_normalized = conceptnet_2d_from_db('en')
     self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle')
     self.affectWN = self.affectwn_raw.normalized()
     self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd()
     
     
     self.EN_NL = get_nl('en')
Ejemplo n.º 4
0
def lookup_concept_from_nl(language, text):
    """
    Look up a concept using any natural language text that represents it.
    This function requires the :mod:`simplenlp` module, or the `standalone_nlp`
    version of it, to normalize natural language text into a raw concept name
    """
    try:
        from csc import nl
    except ImportError:
        import standalone_nlp as nl

    nltools = nl.get_nl(language)
    normalized = nltools.normalize(text)
    return lookup_concept_raw(language, normalized)
Ejemplo n.º 5
0
def make_standalone(lcode):
    nl = get_nl(lcode)
    with open('lang_%s.py' % lcode, 'w') as out_py:
        out_py.write('import sys, os\n')
        out_py.write('import cPickle as pickle\n')
        out_py.write('sys.path.insert(0, os.path.dirname(__file__))\n')
        
        # Pre-load objects
        nl.stopwords
        nl.lemmatizer
        nl.unlemmatizer
        nl.swapdict
        nl.autocorrect
        nl.blacklist
        nl.frequencies

        fake_picklestr = pickle.dumps(nl)
        fake_obj = local_unpickle.loads(fake_picklestr)
        picklestr = pickle.dumps(fake_obj)

        out_py.write('picklestr = """%s"""\n' % (picklestr))
        out_py.write('%s_nl = nltools = pickle.loads(picklestr)\n' % (lcode))
Ejemplo n.º 6
0
def make_standalone(lcode):
    nl = get_nl(lcode)
    with open('lang_%s.py' % lcode, 'w') as out_py:
        out_py.write('import sys, os\n')
        out_py.write('import cPickle as pickle\n')
        out_py.write('sys.path.insert(0, os.path.dirname(__file__))\n')
        
        # Pre-load objects
        nl.stopwords
        nl.lemmatizer
        nl.unlemmatizer
        nl.swapdict
        nl.autocorrect
        nl.blacklist
        nl.frequencies

        fake_picklestr = pickle.dumps(nl)
        fake_obj = local_unpickle.loads(fake_picklestr)
        picklestr = pickle.dumps(fake_obj)

        out_py.write('picklestr = """%s"""\n' % (picklestr))
        out_py.write('%s_nl = nltools = pickle.loads(picklestr)\n' % (lcode))
Ejemplo n.º 7
0
from csc.nl import get_nl
import itertools as it
import divisi2

en_nl = get_nl('en')

A = divisi2.network.conceptnet_matrix('en')
concept_axes, axis_weights, feature_axes = A.normalize_all().svd(k=100)
sim = divisi2.reconstruct_similarity(concept_axes, axis_weights, post_normalize=True)

cheese_text = "Cheese is a type of food. It is made from milk. There are many types of cheese. Many things affect the style, texture and flavor of a cheese. These include the origin of the milk, if the milk has been pasteurized, the amount of butterfat, bacteria and mold in the cheese, how the cheese is made and how old the cheese is. For some cheeses, the milk is curdled by adding acids such as vinegar or lemon juice. Most cheeses are acidified by bacteria. This bacteria turns milk sugars into lactic acid. Rennet is then used to finish the curdling. Vegetarian alternatives to rennet can also be used. Most of these are made by fermentation of a fungus called Mucor miehei. Other alternatives us species of the Cynara thistle family. People have been making cheese since before history was written down. It is not known when cheese was first made. It is known that cheese was eaten by the Sumerians in about 4000 BC. Cheese is usually made using milk. The milk of cows, goats, and sheep are most popular. Buffalo, camel and even mare's milk can also be used. Cheese makers usually cook the milk in large pots. They add salt and a substance from the stomach of young cows called rennet. This curdles the cheese and makes it solid. Some makers do not add rennet. They curdle the cheese in other ways. Cheese made in factories is often curdled by using bacteria. Other ingredients are added and the cheese is usually aged for a short time."
cheese_text_list = cheese_text.split('.')


def extract_concepts(sentence):
    return en_nl.extract_concepts(sentence, max_words=1, check_conceptnet=True)


def find_sim_words(word1, word2):
    try:
        similarity = sim.entry_named(word1, word2)
        return similarity
    except KeyError, err:
        print "Key not found: {0}".format(str(err))


def sentence_sim(concepts):
    pairs = list(it.product(*concepts))
    similarity = 0
    for pair in pairs:
        try:
Ejemplo n.º 8
0
from pyparsing import *
from collections import defaultdict
from csc import divisi2
from csc.nl import get_nl
from locations import Inform6Parser
from verb_reader import verb_reader

english = get_nl('en')

def parseText(thingy):
    _, ident, string = thingy
    concepts = english.extract_concepts(string, max_words=2, check_conceptnet=True)
    for concept in concepts:
        # CAH - This is quite possibly highly sketch.  If this doesn't work, we'll have to spectral.
        descriptions[ident].append(('HasProperty', concept, True))
    return []
    
def join_words(lst):
    return ' '.join(lst)

currentID = ""
idsToNames = defaultdict(list)
descriptions = defaultdict(list)

# keywords
K_WITH = Literal("with")
K_HAS = Literal("has")
K_CLASS = Literal("class")

# top-level definition keywords
D_OBJECT = Literal("Object")
Ejemplo n.º 9
0
from csc.corpus.models import Frequency
from csc.corpus.parse.models import FunctionWord
from csc.nl.mblem import get_mblem

from csc.nl import get_nl
get_nl('en')

from csc.lib.events.models import Event
from csc.lib.voting.models import Vote
from csc.django_settings import INSTALLED_APPS
from csc.pseudo_auth.models import LegacyUser
#from csc.conceptnet.admin import FrequencyAdmin
#from csc.conceptnet.analogyspace import make_category
from csc.webapi.handlers import LanguageHandler
Ejemplo n.º 10
0
 def nl(self):
     if self.language is None:
         raise ValueError("This Dataset is not associated with a natural language")
     return get_nl(self.language)
Ejemplo n.º 11
0
def doctest_globals():
    en_nl = get_nl('en')
    return locals()
Ejemplo n.º 12
0
 def nl(self):
     if self.language is None:
         raise ValueError(
             "This Dataset is not associated with a natural language")
     return get_nl(self.language)
Ejemplo n.º 13
0
 def __init__(self):
     self.EuroNL = get_nl('en')