예제 #1
0
def gbootstrap(ginput, goutput, fnames):
    
    print "Bootstrapping", ginput.getName(), "with",
    
    if fnames == []:
      print "basic elements"
    else:
      for fname in fnames:
        print "\""+fname+"\"",
      print ""
    
    # fixed parameters
    spectral_radius = 0.9
  
    # Sequences
    seqs = []
    
    for fname in fnames:
        x = aux.load_words_from_text(fname, ginput)
        seqs.extend(x)

    if fnames == []:
        seqs = list(goutput.alphabet) # to avoid using a reference!
        seqs.remove(goutput.stop_symbol)
        
    #print seqs

    #print "words #", len(words)
    
    train_set = seqs
    #test_set  = ???
    
    to_train = []
    #to_test = []
    
    for seq in train_set:
        if "SeqWord" in ginput.getName():
          syls = ginput.getSyllables(seq)
        
          for j in range(len(syls)-1):
            to_train.append(list("".join(syls[0:j])))
            
          to_train.extend(list(syls))
          
          to_train.append(list(seq) + [ginput.stop_symbol])
        else:
          to_train.append([seq , ginput.stop_symbol])
    

    wvectorizer   = WVectorizer.WVectorizer(ginput.getSize(), ginput.getInternalSize(), spectral_radius)
    
    #to_train = list(set(to_train))
    
    #print to_train
    
    if "" in to_train:
        to_train.remove("")
        
    if [] in to_train:
        to_train.remove([])
    
    #print to_train
    to_train = map(lambda x: (x, wvectorizer.wvectorize(ginput.input(x))), to_train) 
    #to_test = map(lambda x: (x, wvectorizer.wvectorize(winput.input(x))), to_test) 
    #print to_train
    
    wunvectorizer = WunVectorizer.WunVectorizer(spectral_radius, goutput)
    wunvectorizer.train(to_train, [], True)
    #print ""

    return wvectorizer, wunvectorizer
예제 #2
0
def gbootstrap(ginput, goutput, fnames):

    print "Bootstrapping", ginput.getName(), "with",

    if fnames == []:
        print "basic elements"
    else:
        for fname in fnames:
            print "\"" + fname + "\"",
        print ""

    # fixed parameters
    spectral_radius = 0.9

    # Sequences
    seqs = []

    for fname in fnames:
        x = aux.load_words_from_text(fname, ginput)
        seqs.extend(x)

    if fnames == []:
        seqs = list(goutput.alphabet)  # to avoid using a reference!
        seqs.remove(goutput.stop_symbol)

    #print seqs

    #print "words #", len(words)

    train_set = seqs
    #test_set  = ???

    to_train = []
    #to_test = []

    for seq in train_set:
        if "SeqWord" in ginput.getName():
            syls = ginput.getSyllables(seq)

            for j in range(len(syls) - 1):
                to_train.append(list("".join(syls[0:j])))

            to_train.extend(list(syls))

            to_train.append(list(seq) + [ginput.stop_symbol])
        else:
            to_train.append([seq, ginput.stop_symbol])

    wvectorizer = WVectorizer.WVectorizer(ginput.getSize(),
                                          ginput.getInternalSize(),
                                          spectral_radius)

    #to_train = list(set(to_train))

    #print to_train

    if "" in to_train:
        to_train.remove("")

    if [] in to_train:
        to_train.remove([])

    #print to_train
    to_train = map(lambda x: (x, wvectorizer.wvectorize(ginput.input(x))),
                   to_train)
    #to_test = map(lambda x: (x, wvectorizer.wvectorize(winput.input(x))), to_test)
    #print to_train

    wunvectorizer = WunVectorizer.WunVectorizer(spectral_radius, goutput)
    wunvectorizer.train(to_train, [], True)
    #print ""

    return wvectorizer, wunvectorizer
예제 #3
0
    Copyright 2010, 2011, 2012 by neuromancer
"""

import src.io.text.English as English
import src.core.Mind as Mind
import src.Stats as Stats
from src.aux import load_words_from_text

# mind initilization

winput = English.InputWords()
woutput = English.OutputWords()

mind = Mind.Mind([winput], [woutput], ["data/pos/categories.txt"])

cats_files = load_words_from_text("data/pos/categories.longer.txt", winput)
cats = load_words_from_text("data/pos/categories.txt", winput)

stats = Stats.Stats(mind)

# mind training

for (cat, cat_file) in zip(cats, cats_files):

    print "Assimilating", cat_file + "s.."

    words = load_words_from_text("data/pos/" + cat_file + "s_train.txt",
                                 winput)
    cat_inputs = dict(SeqWordsEn=list(cat))

    for word in words:
예제 #4
0
    Copyright 2010, 2011, 2012 by neuromancer
"""

import src.io.text.English as English
import src.core.Mind as Mind
import src.Stats as Stats
from src.aux import load_words_from_text

# mind initilization

winput = English.InputWords()
woutput = English.OutputWords()

mind = Mind.Mind([winput], [woutput], ["data/pos/categories.txt"])

cats_files = load_words_from_text("data/pos/categories.longer.txt", winput)
cats = load_words_from_text("data/pos/categories.txt", winput)

stats = Stats.Stats(mind)

# mind training

for (cat,cat_file) in zip(cats,cats_files):
    
  print "Assimilating", cat_file+"s.."
  
  words = load_words_from_text("data/pos/"+cat_file+"s_train.txt", winput)
  cat_inputs = dict( SeqWordsEn = list(cat)) 

  for word in words:
     word_inputs = dict( SeqWordsEn = list(word))