def nl(self): """ A collection of natural language tools for a language. See :mod:`csc.nl` for more information on using these tools. """ return get_nl(self.id)
def __init__(self): #self.tensor = get_picklecached_thing('tensor.gz') #self.svd = self.tensor.svd(k=50) self.en_nl = get_nl('en') self.normalizer = LemmatizedEuroNL('en') self.cnet = conceptnet_2d_from_db('en') self.analogyspace = self.cnet.svd(k=100)
def __init__(self): self.helper = DivsiHelper() self.cnet_normalized = conceptnet_2d_from_db('en') self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle') self.affectWN = self.affectwn_raw.normalized() self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd() self.EN_NL = get_nl('en')
def lookup_concept_from_nl(language, text): """ Look up a concept using any natural language text that represents it. This function requires the :mod:`simplenlp` module, or the `standalone_nlp` version of it, to normalize natural language text into a raw concept name """ try: from csc import nl except ImportError: import standalone_nlp as nl nltools = nl.get_nl(language) normalized = nltools.normalize(text) return lookup_concept_raw(language, normalized)
def make_standalone(lcode): nl = get_nl(lcode) with open('lang_%s.py' % lcode, 'w') as out_py: out_py.write('import sys, os\n') out_py.write('import cPickle as pickle\n') out_py.write('sys.path.insert(0, os.path.dirname(__file__))\n') # Pre-load objects nl.stopwords nl.lemmatizer nl.unlemmatizer nl.swapdict nl.autocorrect nl.blacklist nl.frequencies fake_picklestr = pickle.dumps(nl) fake_obj = local_unpickle.loads(fake_picklestr) picklestr = pickle.dumps(fake_obj) out_py.write('picklestr = """%s"""\n' % (picklestr)) out_py.write('%s_nl = nltools = pickle.loads(picklestr)\n' % (lcode))
from csc.nl import get_nl import itertools as it import divisi2 en_nl = get_nl('en') A = divisi2.network.conceptnet_matrix('en') concept_axes, axis_weights, feature_axes = A.normalize_all().svd(k=100) sim = divisi2.reconstruct_similarity(concept_axes, axis_weights, post_normalize=True) cheese_text = "Cheese is a type of food. It is made from milk. There are many types of cheese. Many things affect the style, texture and flavor of a cheese. These include the origin of the milk, if the milk has been pasteurized, the amount of butterfat, bacteria and mold in the cheese, how the cheese is made and how old the cheese is. For some cheeses, the milk is curdled by adding acids such as vinegar or lemon juice. Most cheeses are acidified by bacteria. This bacteria turns milk sugars into lactic acid. Rennet is then used to finish the curdling. Vegetarian alternatives to rennet can also be used. Most of these are made by fermentation of a fungus called Mucor miehei. Other alternatives us species of the Cynara thistle family. People have been making cheese since before history was written down. It is not known when cheese was first made. It is known that cheese was eaten by the Sumerians in about 4000 BC. Cheese is usually made using milk. The milk of cows, goats, and sheep are most popular. Buffalo, camel and even mare's milk can also be used. Cheese makers usually cook the milk in large pots. They add salt and a substance from the stomach of young cows called rennet. This curdles the cheese and makes it solid. Some makers do not add rennet. They curdle the cheese in other ways. Cheese made in factories is often curdled by using bacteria. Other ingredients are added and the cheese is usually aged for a short time." cheese_text_list = cheese_text.split('.') def extract_concepts(sentence): return en_nl.extract_concepts(sentence, max_words=1, check_conceptnet=True) def find_sim_words(word1, word2): try: similarity = sim.entry_named(word1, word2) return similarity except KeyError, err: print "Key not found: {0}".format(str(err)) def sentence_sim(concepts): pairs = list(it.product(*concepts)) similarity = 0 for pair in pairs: try:
from pyparsing import * from collections import defaultdict from csc import divisi2 from csc.nl import get_nl from locations import Inform6Parser from verb_reader import verb_reader english = get_nl('en') def parseText(thingy): _, ident, string = thingy concepts = english.extract_concepts(string, max_words=2, check_conceptnet=True) for concept in concepts: # CAH - This is quite possibly highly sketch. If this doesn't work, we'll have to spectral. descriptions[ident].append(('HasProperty', concept, True)) return [] def join_words(lst): return ' '.join(lst) currentID = "" idsToNames = defaultdict(list) descriptions = defaultdict(list) # keywords K_WITH = Literal("with") K_HAS = Literal("has") K_CLASS = Literal("class") # top-level definition keywords D_OBJECT = Literal("Object")
from csc.corpus.models import Frequency from csc.corpus.parse.models import FunctionWord from csc.nl.mblem import get_mblem from csc.nl import get_nl get_nl('en') from csc.lib.events.models import Event from csc.lib.voting.models import Vote from csc.django_settings import INSTALLED_APPS from csc.pseudo_auth.models import LegacyUser #from csc.conceptnet.admin import FrequencyAdmin #from csc.conceptnet.analogyspace import make_category from csc.webapi.handlers import LanguageHandler
def nl(self): if self.language is None: raise ValueError("This Dataset is not associated with a natural language") return get_nl(self.language)
def doctest_globals(): en_nl = get_nl('en') return locals()
def nl(self): if self.language is None: raise ValueError( "This Dataset is not associated with a natural language") return get_nl(self.language)
def __init__(self): self.EuroNL = get_nl('en')