Beispiel #1
0
    def train(self, d, train_data, sr=1, iss=0.25, leak=0.25/5.0, seed=5, plot=False, fast=False, keep_internal_states=False, verbose=False):            
        train_corpus, train_meaning  = self.txt2corpus_and_meaning(train_txt=train_data)

        ## Random parameters
        if seed is not None:
            mdp.numx.random.seed(seed)
            np.random.seed(seed)
            
        # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
        (l_construction_train, l_ocw_array_train, construction_words) = self.get_and_remove_ocw_in_corpus(corpus=train_corpus, _OCW='X', l_closed_class=self.closed_class_words)          
        
        #################################################
        ## Generating all the sentence stimulus (in order to have the same length for each sentence)
       
        l_full_train = l_construction_train        
        slice_train = slice(0,len(l_construction_train))                


        (stim_full_data_train, l_full_offset_train) = CtIolangcod.generate_stim_input_nodic(l_data=l_full_train,
    #                            act_time=d['act_time'], subset=None, l_input=None,
                                act_time=d['act_time'], subset=None, l_input=construction_words,
                                l_nr_word=None, mult=None, full_time=None,
                                with_offset=d['offset'], pause=d['pause'], initial_pause=d['initial_pause'],
                                suppl_pause_at_the_end=d['suppl_pause_at_the_end'], verbose=False)
                                
        stim_sent_train = stim_full_data_train[slice_train]
        

        
        #################################################
        ## Generating all the meaning stimulus 
        #################################################
    
        l_m_elt = self.get_meaning_coding(imax_nr_ocw=self.imax_nr_ocw, imax_nr_actionrelation=self.imax_nr_actionrelation, elt_pred=self.l_elt_pred)
    
        (stim_mean_train, l_meaning_code_train) = self.generate_meaning_stim(l_data=train_meaning,
               l_ocw_array=l_ocw_array_train, full_time=stim_sent_train[0].shape[0],
               l_m_elt=l_m_elt, l_offset=l_full_offset_train[slice_train], verbose=False,
               initial_pause=d['initial_pause'], pause=d['pause'], act_time=d['act_time'])
         
        
        ## Defining reservoir, readout and flow
        reservoir = Oger.nodes.LeakyReservoirNode(output_dim = self.nbNeurons, spectral_radius=sr, input_scaling=iss, nonlin_func=np.tanh, leak_rate=leak)
     
        read_out = mdp.nodes.LinearRegressionNode(use_pinv=True, with_bias=True)
        flow = mdp.Flow([reservoir, read_out]) 

        ## Trainning and testing
        (states_out_train, internal_states_train, internal_outputs_train, neuron_states_train) = \
            self.teach_and_test_flow(inputs_train_set=stim_sent_train, teacher_outputs_train_set=stim_mean_train, inputs_test_set=stim_sent_train, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)
    
        if verbose:
            for i in range(len(stim_mean_train)):
                print "len(stim_mean_train)", len(stim_mean_train)
                print "len(l_meaning_code_train)", len(l_meaning_code_train)
                print l_meaning_code_train[i]
                print (stim_mean_train[0]==stim_mean_train[i])
                print (l_meaning_code_train[0]==l_meaning_code_train[i])
        
        return l_construction_train, stim_full_data_train, flow, reservoir, keep_internal_states, l_m_elt, construction_words
Beispiel #2
0
    def test(self, d, test_corpus, shelf):
               
        
        (l_construction_test, l_ocw_array_test, construction_words_test) = self.get_and_remove_ocw_in_corpus(corpus=test_corpus, _OCW='X', l_closed_class=self.closed_class_words)        

        if shelf["construction_words"]!=construction_words_test:
            raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."
   
        l_full_const = l_construction_test

        (stim_full_data_test, l_full_offset_test) = CtIolangcod.generate_stim_input_nodic(l_data=l_full_const,
                                act_time=d['act_time'], subset=None, l_input=shelf["construction_words"],
                                l_nr_word=None, mult=None, full_time=None,
                                with_offset=d['offset'], pause=d['pause'], initial_pause=d['initial_pause'],
                                suppl_pause_at_the_end=d['suppl_pause_at_the_end'], verbose=False)
                                
        slice_test = slice(len(shelf["l_construction_train"]),len(shelf["l_construction_train"])+len(l_construction_test))
        
        stim_full_data = shelf["stim_full_data_train"] + stim_full_data_test
        stim_sent_test = stim_full_data[slice_test]
        
        (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
            self.test_flow(inputs_test_set=stim_sent_test, _flow=shelf["flow"], _reservoir=shelf["reservoir"], keep_internal_states=shelf["keep_internal_states"])

        l_recovered_meaning_test = self.convert_l_output_activity_in_meaning(l_out_act=states_out_test, l_ocw_array=l_ocw_array_test, l_m_elt=shelf["l_m_elt"])
        return l_recovered_meaning_test
Beispiel #3
0
    def test(self, d, test_corpus, shelf):

        (l_construction_test, l_ocw_array_test,
         construction_words_test) = self.get_and_remove_ocw_in_corpus(
             corpus=test_corpus,
             _OCW='X',
             l_closed_class=self.closed_class_words)

        if shelf["construction_words"] != construction_words_test:
            raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."

        l_full_const = l_construction_test

        (stim_full_data_test,
         l_full_offset_test) = CtIolangcod.generate_stim_input_nodic(
             l_data=l_full_const,
             act_time=d['act_time'],
             subset=None,
             l_input=shelf["construction_words"],
             l_nr_word=None,
             mult=None,
             full_time=None,
             with_offset=d['offset'],
             pause=d['pause'],
             initial_pause=d['initial_pause'],
             suppl_pause_at_the_end=d['suppl_pause_at_the_end'],
             verbose=False)

        slice_test = slice(
            len(shelf["l_construction_train"]),
            len(shelf["l_construction_train"]) + len(l_construction_test))

        stim_full_data = shelf["stim_full_data_train"] + stim_full_data_test
        stim_sent_test = stim_full_data[slice_test]

        (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
            self.test_flow(inputs_test_set=stim_sent_test, _flow=shelf["flow"], _reservoir=shelf["reservoir"], keep_internal_states=shelf["keep_internal_states"])

        l_recovered_meaning_test = self.convert_l_output_activity_in_meaning(
            l_out_act=states_out_test,
            l_ocw_array=l_ocw_array_test,
            l_m_elt=shelf["l_m_elt"])
        return l_recovered_meaning_test
Beispiel #4
0
    def train(self, sent_form_info_train, train_meaning, train_corpus, d, sr=3, iss=0.1, leak=0.1, ridge=10**-1):
        import io_language_coding as CtIolangcod                
        ## Random parameters
        import time
        millis = int(round(time.time() ))    
        seed = millis#2#4#2
    
        if seed is not None:
            mdp.numx.random.seed(seed)
            np.random.seed(seed)

        # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
        (l_construction_train, construction_words) = self.get_and_remove_ocw_in_corpus(corpus=train_corpus, _OCW='X')
        l_ocw_array_train=self.generate_l_ocw_array(sent_form_info_train, train_meaning)

        l_full_const = l_construction_train
        slice_train = slice(0,len(l_construction_train))
        (stim_full_data, l_full_offset) = CtIolangcod.generate_stim_input_nodic(l_data=l_full_const,
    #                            act_time=d['act_time'], subset=None, l_input=None,
                                act_time=d['act_time'], subset=None, l_input=construction_words,
                                l_nr_word=None, mult=None, full_time=None,
                                with_offset=d['offset'], pause=d['pause'], initial_pause=d['initial_pause'],
                                suppl_pause_at_the_end=d['suppl_pause_at_the_end'], verbose=False)
        stim_sent_train = stim_full_data[slice_train]
        l_m_elt = self.get_meaning_coding(max_nr_ocw=self.imax_nr_ocw, max_nr_actionrelation=self.imax_nr_actionrelation, elt_pred=self.l_elt_pred)

        (stim_mean_train, l_meaning_code_train) = self.generate_meaning_stim(l_structure=sent_form_info_train, full_time=stim_sent_train[0].shape[0], l_m_elt=l_m_elt)

        # Reservoir and Read-out definitions
        res = reservoir.Reservoir(self.iNbNeurons, sr, iss, leak)
    
        #classic working of the reservoir without feedback

        ## test set = train set
        states_out_train, internal_states_train = res.train (stim_mean_train, stim_sent_train)

        return l_ocw_array_train, states_out_train, construction_words, internal_states_train, res, stim_mean_train, stim_sent_train, l_m_elt
Beispiel #5
0
def main(path_file_in,
         path_file_out,
         N=800,
         sr=3,
         iss=0.1,
         leak=0.1,
         ridge=10**-1,
         plot=False,
         feedback=False,
         return_result=False,
         verbose=False):
    def write_list_in_file(l, file=None, file_path=None):
        """
        Write a list in a file with with one item per line (like a one column csv).
        
        If file is given, then it assumes the file is already open for writing.
        If file_path is given, then it opens the file for writing, write the list, and then close the file.
        """
        if file_path is not None:
            if file is not None:
                raise Exception, "Too much arguments. You must choose between file and file_path."
            else:
                file = open(file_path, 'w')
        if file is None:
            raise Exception, "No file given in input."

        for item in l:
            file.write("%s\n" % item)

        if file_path is not None:
            file.close()

    import io_language_coding as CtIolangcod
    sentence_to_meaning = False

    # Definning parameters of stimulus (in a dictionary)
    d = {}
    d['act_time'] = 5
    d['pause'] = True
    d['suppl_pause_at_the_end'] = 1 * d['act_time']
    d['initial_pause'] = True
    d['offset'] = False

    ## Random parameters
    import time
    millis = int(round(time.time()))
    seed = millis  #2#4#2

    if seed is not None:
        mdp.numx.random.seed(seed)
        np.random.seed(seed)

    [train_data_txt, test_data_txt, sent_form_info_train,
     sent_form_info_test] = extract_data_io(path_file=path_file_in)

    train_corpus, train_meaning = txt2corpus_and_meaning(
        train_txt=train_data_txt)
    if sentence_to_meaning:
        test_corpus = test_data_txt
    else:
        test_meaning = test_data_txt
    # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
    (l_construction_train,
     construction_words) = get_and_remove_ocw_in_corpus(corpus=train_corpus,
                                                        _OCW='X')
    l_ocw_array_train = generate_l_ocw_array(sent_form_info_train,
                                             train_meaning)
    l_ocw_array_test = generate_l_ocw_array(sent_form_info_test, test_meaning)
    #print "**************************"
    #print "l_construction_train", l_construction_train
    #print "construction words", construction_words
    if sentence_to_meaning:
        (l_construction_test,
         construction_words_test) = get_and_remove_ocw_in_corpus(
             corpus=test_corpus, _OCW='X')
        #print "l_construction_test", l_construction_test
        if construction_words != construction_words_test:
            raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."

    ## Generating all the sentence stimulus (in order to have the same length for each sentence)
    if sentence_to_meaning:
        ## Generate the stimulus input for train and test data
        l_full_const = l_construction_train + l_construction_test
        slice_test = slice(
            len(l_construction_train),
            len(l_construction_train) + len(l_construction_test))

    else:
        l_full_const = l_construction_train
    slice_train = slice(0, len(l_construction_train))
    (stim_full_data, l_full_offset) = CtIolangcod.generate_stim_input_nodic(
        l_data=l_full_const,
        #                            act_time=d['act_time'], subset=None, l_input=None,
        act_time=d['act_time'],
        subset=None,
        l_input=construction_words,
        l_nr_word=None,
        mult=None,
        full_time=None,
        with_offset=d['offset'],
        pause=d['pause'],
        initial_pause=d['initial_pause'],
        suppl_pause_at_the_end=d['suppl_pause_at_the_end'],
        verbose=False)
    stim_sent_train = stim_full_data[slice_train]
    if sentence_to_meaning:
        stim_sent_test = stim_full_data[slice_test]

    l_m_elt = get_meaning_coding()

    (stim_mean_train, l_meaning_code_train) = generate_meaning_stim(
        l_structure=sent_form_info_train,
        full_time=stim_sent_train[0].shape[0],
        l_m_elt=l_m_elt)

    if not sentence_to_meaning:
        #print "*** Generating meaning for test set ... ***"
        (stim_mean_test, l_meaning_code_test) = generate_meaning_stim(
            l_structure=sent_form_info_test,
            full_time=stim_sent_train[0].shape[0],
            l_m_elt=l_m_elt)

    other_corpus_used = False

    # Reservoir and Read-out definitions
    res = reservoir.Reservoir(N, sr, iss, leak)

    #classic working of the reservoir
    if feedback == False:
        ## test set = train set
        states_out_train, internal_states_train = res.train(
            stim_mean_train, stim_sent_train)
        ## test set not train set
        states_out_test, internal_states_test = res.test(stim_mean_test)
    #feedback working of the reservoir. !! Should be implemented directly in the reservoir class !!
    else:
        delay = 1
        nb_epoch_max = 4
        dim_input = stim_mean_train[0].shape[1]
        dim_output = len(stim_sent_train[0][0])
        input_train = []

        for (x, y) in zip(np.copy(stim_mean_train), np.copy(stim_sent_train)):
            for time_step_delay in range(delay):
                y = np.concatenate(([[0.] * len(y[0])], y), axis=0)
            input_train.append(
                np.array(np.concatenate((x, y[:-delay]), axis=1)))

        nb_train = 0
        while nb_train < nb_epoch_max:
            ## test set = train set
            states_out_train, internal_states_train = res.train(
                input_train, stim_sent_train)

            tab_feedback = []
            for num_phrase in range(len(states_out_train)):
                #signal tresholded
                states_out_train[num_phrase] = np.array([
                    treshold_signal(signal_t, 1.5, -0.5)
                    for signal_t in states_out_train[num_phrase]
                ])
                if nb_train == 0:  #feedback kept only for the first train
                    #feedback assignation
                    feedback = np.array(states_out_train[num_phrase])
                    #signal delayed
                    for time_step_delay in range(delay):
                        feedback = np.concatenate(
                            ([[0.] * len(feedback[0])], feedback), axis=0)

                tab_feedback.append(feedback)
                input_train[num_phrase] = input_train[num_phrase].T
                input_train[num_phrase][dim_input:] = feedback[:-delay].T
                input_train[num_phrase] = input_train[num_phrase].T

            nb_train += 1

        ## test set not train set
        for t in range(0, stim_mean_test[0].shape[0], 1):
            input_test = []
            if t == 0:  #A REMODIFIER
                for n_phrase in range(len(stim_mean_test)):
                    input_test.append(
                        np.concatenate((stim_mean_test[n_phrase][t:t + 1, :],
                                        [[0.] * len(stim_sent_train[0][0])]),
                                       axis=1))

                states_out_test, internal_states_test = res.test(input_test)
                import copy
                states_out_test_def = copy.deepcopy(states_out_test)

            else:
                for n_phrase in range(len(stim_mean_test)):
                    #feedback assignation
                    feedback = np.array(states_out_test[n_phrase])
                    input_test.append(
                        np.concatenate(
                            (stim_mean_test[n_phrase][t:t + 1, :], feedback),
                            axis=1))

                states_out_test, internal_states_test = res.test(input_test)

                for n_phrase in range(len(stim_mean_test)):
                    states_out_test_def[n_phrase] = np.concatenate(
                        (states_out_test_def[n_phrase],
                         states_out_test[n_phrase]),
                        axis=0)

        states_out_test = states_out_test_def

    # Ecriture de la phrase de réponse
    if other_corpus_used:
        var_inutile = 0

    else:

        l_recovered_construction_train = convert_l_output_activity_in_construction(
            l_out_act=states_out_train,
            construction_words=construction_words,
            min_nr_of_val_upper_thres=1)
        l_recovered_sentences_train = attribute_ocw_to_constructions(
            l_constructions=l_recovered_construction_train,
            l_ocw_array=l_ocw_array_train,
            _OCW='X')

        l_recovered_construction_test = convert_l_output_activity_in_construction(
            l_out_act=states_out_test,
            construction_words=construction_words,
            min_nr_of_val_upper_thres=2)
        l_recovered_sentences_test = attribute_ocw_to_constructions(
            l_constructions=l_recovered_construction_test,
            l_ocw_array=l_ocw_array_test,
            _OCW='X')

        ## Writting sentences to output file
        #print " *** Writting to output file ... *** "
        l_final_sent_test = []
        for list_words in l_recovered_sentences_test:
            l_final_sent_test.append(" ".join(list_words))

        #print " *** ... Writting done ***"
        #print "**********************************************"
        print "********************************************** "
        print " *** RECOGNIZED SENTENCES *** "
        print l_final_sent_test[0]

        write_list_in_file(l=l_final_sent_test, file_path=path_file_out)
        if return_result:
            return l_final_sent_test

    ## Plot inputs
    if plot:
        import plotting as plotting

        plotting.plot_array_in_file(
            root_file_name="../Results/states_out_train",
            array_=states_out_train,
            titles_subset=l_construction_train,
            legend_=construction_words,
            plot_slice=None,
            title="",
            subtitle="")

        plotting.plot_array_in_file(
            root_file_name="../Results/states_out_test",
            array_=states_out_test,
            titles_subset=l_recovered_sentences_test,
            legend_=construction_words,
            plot_slice=None,
            title="",
            subtitle="")

    print ""
Beispiel #6
0
def main(path_file_in,
         path_file_out,
         plot=False,
         fast=False,
         keep_internal_states=False,
         verbose=False):
    import os
    #sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..")
    #print "path   ", os.path.dirname(os.path.abspath(__file__))+"/.."
    current_directory = os.path.dirname(os.path.abspath(__file__))
    parent_directory = os.path.dirname(current_directory)
    sys.path.append(parent_directory)
    import io_language_coding as CtIolangcod

    # Definning parameters of stimulus (in a dictionary)
    d = {}
    d['act_time'] = 5  #2#1#5#10#2
    d['pause'] = True  #False
    d['suppl_pause_at_the_end'] = 1 * d['act_time']
    d['initial_pause'] = False  #True#False#False
    d['offset'] = True  #False#True

    # Parameters for reservoir
    N = 500  #500#500#1000 #100
    sr = 1  #3#3#2#1
    iss = 0.25  #0.01#1
    leak = 0.25 / float(
        d['act_time'])  #0.75/float(d['act_time'])#0.75/2.#0.5#0.05

    ## Random parameters
    seed = 5
    if seed is not None:
        mdp.numx.random.seed(seed)
        np.random.seed(seed)

    [train_data_txt, test_data_txt,
     sent_form_info_test] = common.extract_data_io(path_file=path_file_in)
    train_corpus, train_meaning = common.txt2corpus_and_meaning(
        train_txt=train_data_txt)

    test_corpus = test_data_txt

    # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
    (l_construction_train, l_ocw_array_train,
     construction_words) = common.get_and_remove_ocw_in_corpus(
         corpus=train_corpus,
         _OCW='X',
         l_closed_class=get_closed_class_words())

    (l_construction_test, l_ocw_array_test,
     construction_words_test) = common.get_and_remove_ocw_in_corpus(
         corpus=test_corpus, _OCW='X', l_closed_class=get_closed_class_words())
    if construction_words != construction_words_test:
        raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."

    #################################################
    ## Generating all the sentence stimulus (in order to have the same length for each sentence)

    l_full_const = l_construction_train + l_construction_test
    slice_test = slice(len(l_construction_train),
                       len(l_construction_train) + len(l_construction_test))

    slice_train = slice(0, len(l_construction_train))
    (stim_full_data, l_full_offset) = CtIolangcod.generate_stim_input_nodic(
        l_data=l_full_const,
        #                            act_time=d['act_time'], subset=None, l_input=None,
        act_time=d['act_time'],
        subset=None,
        l_input=construction_words,
        l_nr_word=None,
        mult=None,
        full_time=None,
        with_offset=d['offset'],
        pause=d['pause'],
        initial_pause=d['initial_pause'],
        suppl_pause_at_the_end=d['suppl_pause_at_the_end'],
        verbose=False)
    stim_sent_train = stim_full_data[slice_train]

    stim_sent_test = stim_full_data[slice_test]

    #################################################
    ## Generating all the meaning stimulus
    #################################################

    l_m_elt = common.get_meaning_coding(
        max_nr_ocw=max_nr_ocw,
        max_nr_actionrelation=max_nr_actionrelation,
        elt_pred=elt_pred)

    (stim_mean_train, l_meaning_code_train) = common.generate_meaning_stim(
        l_data=train_meaning,
        l_ocw_array=l_ocw_array_train,
        full_time=stim_sent_train[0].shape[0],
        l_m_elt=l_m_elt,
        l_offset=l_full_offset[slice_train],
        verbose=False,
        initial_pause=d['initial_pause'],
        pause=d['pause'],
        act_time=d['act_time'])

    ## Defining reservoir, readout and flow
    reservoir = Oger.nodes.LeakyReservoirNode(output_dim=N,
                                              spectral_radius=sr,
                                              input_scaling=iss,
                                              nonlin_func=np.tanh,
                                              leak_rate=leak)
    read_out = mdp.nodes.LinearRegressionNode(use_pinv=True, with_bias=True)
    flow = mdp.Flow([reservoir, read_out])
    if keep_internal_states:
        Oger.utils.make_inspectable(mdp.Flow)

    ## Trainning and testing
    print "Train and test"
    if not fast:
        (states_out_train, internal_states_train, internal_outputs_train, neuron_states_train) = \
            common._teach_and_test_flow(inputs_train_set=stim_sent_train, teacher_outputs_train_set=stim_mean_train, inputs_test_set=stim_sent_train, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)
    else:
        raise Exception, "have to define what to do for fast mode"
    ## test set not train set

    (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
        common._test_flow(inputs_test_set=stim_sent_test, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)

    if verbose:
        for i in range(len(stim_mean_train)):
            print "len(stim_mean_train)", len(stim_mean_train)
            print "len(l_meaning_code_train)", len(l_meaning_code_train)
            print l_meaning_code_train[i]
            print(stim_mean_train[0] == stim_mean_train[i])
            print(l_meaning_code_train[0] == l_meaning_code_train[i])

    ## Writting output meaning

    l_recovered_meaning_test = convert_l_output_activity_in_meaning(
        l_out_act=states_out_test,
        l_ocw_array=l_ocw_array_test,
        l_m_elt=l_m_elt)

    if verbose:
        print "l_recovered_meaning_test", l_recovered_meaning_test
    l_final_mean_test = []
    for meanings in l_recovered_meaning_test:
        current_meanings = ""
        if verbose:
            print "meanings", meanings
        for i_m in range(len(meanings)):
            # if verbose:
            # print " i_m:",i_m
            #  print " meanings[i_m]:",meanings[i_m]
            if i_m > 0:
                current_meanings += ','
            current_meanings += " ".join(meanings[i_m])
        l_final_mean_test.append(current_meanings)
    print ""
    print "**********************************************"
    print " *** RECOGNIZED MEANINGS *** "
    for elt in l_final_mean_test:
        print str(elt)
    print "**********************************************"

    ## Writting sentences to output file
    print " *** Writting to output file ... *** "
    #ecrire une seule ligne simple dans un fichier la phrase attendue en mode test
    common.write_list_in_file(l=l_final_mean_test, file_path=path_file_out)

    print " *** ... Writting done ***"
    print "**********************************************"

    ## Plot
    if plot:
        print " *** Plotting to output file ... *** "
        #        import oct2011.plotting as plotting
        import plotting as plotting
        plotting.plot_array_in_file(
            root_file_name="../RES_TEMP/states_out_train",
            array_=states_out_train,
            titles_subset=l_meaning_code_train,
            #                                        legend_=l_m_elt, plot_slice=None, title="", subtitle="")
            legend_=None,
            plot_slice=None,
            title="",
            subtitle="")
        plotting.plot_array_in_file(
            root_file_name="../RES_TEMP/states_out_train_sent",
            array_=states_out_train,
            titles_subset=train_meaning,
            #                                        legend_=l_m_elt, plot_slice=None, title="", subtitle="")
            legend_=None,
            plot_slice=None,
            title="",
            subtitle="")
        plotting.plot_array_in_file(
            root_file_name="../RES_TEMP/states_out_test",
            array_=states_out_test,
            titles_subset=l_final_mean_test,
            #                                        legend_=l_m_elt, plot_slice=None, title="", subtitle="")
            legend_=None,
            plot_slice=None,
            title="",
            subtitle="")

        plotting.plot_array_in_file(
            root_file_name="../RES_TEMP/intern_states_test",
            array_=internal_states_test,
            titles_subset=None,
            plot_slice=None,
            title="",
            subtitle="")

        print " *** ... Plotting to output file done *** "
        print "**********************************************"
        return l_final_mean_test
Beispiel #7
0
    def train(self,
              d,
              train_data,
              focus,
              sr=1,
              iss=0.25,
              leak=0.25 / 5.0,
              seed=5,
              plot=False,
              fast=False,
              keep_internal_states=False):
        train_corpus, train_meaning = self.txt2corpus_and_meaning(
            train_txt=train_data)
        if self.verbose:
            print "train_corpus ", train_corpus
            print "train_meaning ", train_meaning
        ## Random parameters
        if seed is not None:
            mdp.numx.random.seed(seed)
            np.random.seed(seed)

        # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
        (l_construction_train, l_ocw_array_train,
         construction_words) = self.get_and_remove_ocw_in_corpus(
             corpus=train_corpus,
             _OCW='X',
             l_closed_class=self.closed_class_words)

        #################################################
        ## Generating all the sentence stimulus (in order to have the same length for each sentence)

        l_full_train = l_construction_train
        slice_train = slice(0, len(l_construction_train))
        if self.verbose:
            print "slice_train: ", slice_train
            print "l_construction_train : ", l_construction_train

        (stim_full_data_train,
         l_full_offset_train) = CtIolangcod.generate_stim_input_nodic(
             l_data=l_full_train,
             act_time=d['act_time'],
             subset=None,
             l_input=construction_words,
             l_nr_word=None,
             mult=None,
             full_time=None,
             with_offset=d['offset'],
             pause=d['pause'],
             initial_pause=d['initial_pause'],
             suppl_pause_at_the_end=d['suppl_pause_at_the_end'])

        stim_sent_train = stim_full_data_train[slice_train]

        #################################################
        ## Generating all the meaning stimulus
        #################################################

        l_m_elt = self.get_meaning_coding()

        (stim_mean_train, l_meaning_code_train) = self.generate_meaning_stim(
            l_data=train_meaning,
            l_ocw_array=l_ocw_array_train,
            full_time=stim_sent_train[0].shape[0],
            l_m_elt=l_m_elt,
            l_offset=l_full_offset_train[slice_train],
            initial_pause=d['initial_pause'],
            pause=d['pause'],
            act_time=d['act_time'])

        ## Defining reservoir, readout and flow
        reservoir = Oger.nodes.LeakyReservoirNode(output_dim=self.nbNeurons,
                                                  spectral_radius=sr,
                                                  input_scaling=iss,
                                                  nonlin_func=np.tanh,
                                                  leak_rate=leak)

        read_out = mdp.nodes.LinearRegressionNode(use_pinv=True,
                                                  with_bias=True)
        flow = mdp.Flow([reservoir, read_out])

        ## Trainning and testing
        (states_out_train, internal_states_train, internal_outputs_train, neuron_states_train) = \
            self.teach_and_test_flow(inputs_train_set=stim_sent_train, teacher_outputs_train_set=stim_mean_train, inputs_test_set=stim_sent_train, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)

        return l_construction_train, stim_full_data_train, flow, reservoir, keep_internal_states, l_m_elt, construction_words
def main(path_file_in, path_file_out, plot=False, fast=False, keep_internal_states=False, verbose=False):
    import os
    #sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/..")
    #print "path   ", os.path.dirname(os.path.abspath(__file__))+"/.."
    current_directory = os.path.dirname(os.path.abspath(__file__))
    parent_directory = os.path.dirname(current_directory)
    sys.path.append(parent_directory)
    import io_language_coding as CtIolangcod
    
    
    # Definning parameters of stimulus (in a dictionary)
    d = {}
    d['act_time'] = 5#2#1#5#10#2
    d['pause'] = True#False
    d['suppl_pause_at_the_end'] = 1*d['act_time']
    d['initial_pause'] = False#True#False#False
    d['offset'] = True#False#True
    
    # Parameters for reservoir
    N = 500#500#500#1000 #100
    sr = 1#3#3#2#1
    iss = 0.25#0.01#1
    leak = 0.25/float(d['act_time'])#0.75/float(d['act_time'])#0.75/2.#0.5#0.05
    
    ## Random parameters
    seed = 5
    if seed is not None:
        mdp.numx.random.seed(seed)
        np.random.seed(seed)
        
    [train_data_txt, test_data_txt, sent_form_info_test] = common.extract_data_io(path_file=path_file_in)
    train_corpus, train_meaning  = common.txt2corpus_and_meaning(train_txt=train_data_txt)

    test_corpus = test_data_txt
    
    # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
    (l_construction_train, l_ocw_array_train, construction_words) = common.get_and_remove_ocw_in_corpus(corpus=train_corpus, _OCW='X', l_closed_class=get_closed_class_words())

    (l_construction_test, l_ocw_array_test, construction_words_test) = common.get_and_remove_ocw_in_corpus(corpus=test_corpus, _OCW='X', l_closed_class=get_closed_class_words())
    if construction_words!=construction_words_test:
        raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."

    
    #################################################
    ## Generating all the sentence stimulus (in order to have the same length for each sentence)

    l_full_const = l_construction_train + l_construction_test
    slice_test = slice(len(l_construction_train),len(l_construction_train)+len(l_construction_test))
        
    slice_train = slice(0,len(l_construction_train))
    (stim_full_data, l_full_offset) = CtIolangcod.generate_stim_input_nodic(l_data=l_full_const,
#                            act_time=d['act_time'], subset=None, l_input=None,
                            act_time=d['act_time'], subset=None, l_input=construction_words,
                            l_nr_word=None, mult=None, full_time=None,
                            with_offset=d['offset'], pause=d['pause'], initial_pause=d['initial_pause'],
                            suppl_pause_at_the_end=d['suppl_pause_at_the_end'], verbose=False)
    stim_sent_train = stim_full_data[slice_train]

    stim_sent_test = stim_full_data[slice_test]


    #################################################
    ## Generating all the meaning stimulus 
    #################################################

    l_m_elt = common.get_meaning_coding(max_nr_ocw=max_nr_ocw, max_nr_actionrelation=max_nr_actionrelation, elt_pred=elt_pred)

    (stim_mean_train, l_meaning_code_train) = common.generate_meaning_stim(l_data=train_meaning,
           l_ocw_array=l_ocw_array_train, full_time=stim_sent_train[0].shape[0],
           l_m_elt=l_m_elt, l_offset=l_full_offset[slice_train], verbose=False,
           initial_pause=d['initial_pause'], pause=d['pause'], act_time=d['act_time'])
        
    ## Defining reservoir, readout and flow
    reservoir = Oger.nodes.LeakyReservoirNode(output_dim = N, spectral_radius = sr, input_scaling =iss, nonlin_func = np.tanh, leak_rate = leak)
    read_out = mdp.nodes.LinearRegressionNode(use_pinv=True, with_bias=True)
    flow = mdp.Flow([reservoir, read_out])
    if keep_internal_states:
        Oger.utils.make_inspectable(mdp.Flow)
        
    ## Trainning and testing
    print "Train and test"
    if not fast:
        (states_out_train, internal_states_train, internal_outputs_train, neuron_states_train) = \
            common._teach_and_test_flow(inputs_train_set=stim_sent_train, teacher_outputs_train_set=stim_mean_train, inputs_test_set=stim_sent_train, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)
    else:
        raise Exception, "have to define what to do for fast mode"
    ## test set not train set

    (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
        common._test_flow(inputs_test_set=stim_sent_test, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)

    if verbose:
        for i in range(len(stim_mean_train)):
            print "len(stim_mean_train)", len(stim_mean_train)
            print "len(l_meaning_code_train)", len(l_meaning_code_train)
            print l_meaning_code_train[i]
            print (stim_mean_train[0]==stim_mean_train[i])
            print (l_meaning_code_train[0]==l_meaning_code_train[i])
    
    ## Writting output meaning

    l_recovered_meaning_test = convert_l_output_activity_in_meaning(l_out_act=states_out_test, l_ocw_array=l_ocw_array_test, l_m_elt=l_m_elt)

    if verbose:
        print "l_recovered_meaning_test", l_recovered_meaning_test
    l_final_mean_test = []
    for meanings in l_recovered_meaning_test:
        current_meanings = ""
        if verbose:
            print "meanings", meanings
        for i_m in range(len(meanings)):
            # if verbose:
            # print " i_m:",i_m
            #  print " meanings[i_m]:",meanings[i_m]
            if i_m>0:
                current_meanings+=','
            current_meanings+=" ".join(meanings[i_m])
        l_final_mean_test.append(current_meanings)
    print ""
    print "**********************************************"
    print " *** RECOGNIZED MEANINGS *** "
    for elt in l_final_mean_test:
        print str(elt)
    print "**********************************************"

    ## Writting sentences to output file
    print " *** Writting to output file ... *** "
    #ecrire une seule ligne simple dans un fichier la phrase attendue en mode test
    common.write_list_in_file(l=l_final_mean_test, file_path=path_file_out)   
    
    print " *** ... Writting done ***"
    print "**********************************************"

    ## Plot
    if plot:
        print " *** Plotting to output file ... *** "
        import oct2011.plotting as plotting
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_train",
                                    array_=states_out_train, titles_subset=l_meaning_code_train,
#                                        legend_=l_m_elt, plot_slice=None, title="", subtitle="")
                                    legend_=None, plot_slice=None, title="", subtitle="")
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_train_sent",
                                    array_=states_out_train, titles_subset=train_meaning,
#                                        legend_=l_m_elt, plot_slice=None, title="", subtitle="")
                                    legend_=None, plot_slice=None, title="", subtitle="")
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_test",
                                    array_=states_out_test, titles_subset=l_final_mean_test,
#                                        legend_=l_m_elt, plot_slice=None, title="", subtitle="")
                                    legend_=None, plot_slice=None, title="", subtitle="")   
    
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/intern_states_test", array_=internal_states_test, titles_subset=None, plot_slice=None, title="", subtitle="")

        print " *** ... Plotting to output file done *** "
        print "**********************************************"
        return l_final_mean_test 
def main(path_file_in, path_file_out,N=1200, sr=3, iss=0.1, leak=0.1, ridge=10**-1, plot=False, feedback=False, return_result=False, verbose=False):
    def write_list_in_file(l, file=None, file_path=None):
        """
        Write a list in a file with with one item per line (like a one column csv).
        
        If file is given, then it assumes the file is already open for writing.
        If file_path is given, then it opens the file for writing, write the list, and then close the file.
        """
        if file_path is not None:
            if file is not None:
                raise Exception, "Too much arguments. You must choose between file and file_path."
            else:
                file = open(file_path, 'w')
        if file is None:
            raise Exception, "No file given in input."
        
        for item in l:
            file.write("%s\n" % item)
            
        if file_path is not None:
            file.close()

    import io_language_coding as CtIolangcod
    sentence_to_meaning = False
    
    # Definning parameters of stimulus (in a dictionary)
    d = {}
    d['act_time'] = 5
    d['pause'] = True
    d['suppl_pause_at_the_end'] = 1*d['act_time']
    d['initial_pause'] = True
    d['offset'] = False
    
    
    ## Random parameters
    import time
    millis = int(round(time.time() ))    
    seed = millis#2#4#2

    if seed is not None:
        mdp.numx.random.seed(seed)
        np.random.seed(seed)
        
    [train_data_txt, test_data_txt, sent_form_info_train, sent_form_info_test] = extract_data_io(path_file=path_file_in)

    train_corpus, train_meaning  = txt2corpus_and_meaning(train_txt=train_data_txt)
    if sentence_to_meaning:
        test_corpus = test_data_txt
    else:
        test_meaning = test_data_txt
    # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
    (l_construction_train, construction_words) = get_and_remove_ocw_in_corpus(corpus=train_corpus, _OCW='X')
    l_ocw_array_train=generate_l_ocw_array(sent_form_info_train, train_meaning)
    l_ocw_array_test=generate_l_ocw_array(sent_form_info_test, test_meaning)
    #print "**************************"
    #print "l_construction_train", l_construction_train
    #print "construction words", construction_words
    if sentence_to_meaning:
        (l_construction_test, construction_words_test) = get_and_remove_ocw_in_corpus(corpus=test_corpus, _OCW='X')
        #print "l_construction_test", l_construction_test
        if construction_words!=construction_words_test:
            raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."
    
    ## Generating all the sentence stimulus (in order to have the same length for each sentence)
    if sentence_to_meaning:
        ## Generate the stimulus input for train and test data
        l_full_const = l_construction_train + l_construction_test
        slice_test = slice(len(l_construction_train),len(l_construction_train)+len(l_construction_test))

    else:
        l_full_const = l_construction_train
    slice_train = slice(0,len(l_construction_train))
    (stim_full_data, l_full_offset) = CtIolangcod.generate_stim_input_nodic(l_data=l_full_const,
#                            act_time=d['act_time'], subset=None, l_input=None,
                            act_time=d['act_time'], subset=None, l_input=construction_words,
                            l_nr_word=None, mult=None, full_time=None,
                            with_offset=d['offset'], pause=d['pause'], initial_pause=d['initial_pause'],
                            suppl_pause_at_the_end=d['suppl_pause_at_the_end'], verbose=False)
    stim_sent_train = stim_full_data[slice_train]
    if sentence_to_meaning:
        stim_sent_test = stim_full_data[slice_test]

    
    l_m_elt = get_meaning_coding()

    (stim_mean_train, l_meaning_code_train) = generate_meaning_stim(l_structure=sent_form_info_train, full_time=stim_sent_train[0].shape[0], l_m_elt=l_m_elt)

    if not sentence_to_meaning:
        #print "*** Generating meaning for test set ... ***"
        (stim_mean_test, l_meaning_code_test) = generate_meaning_stim(l_structure=sent_form_info_test, full_time=stim_sent_train[0].shape[0], l_m_elt=l_m_elt)

    other_corpus_used = False

    # Reservoir and Read-out definitions
    res = reservoir.Reservoir(N, sr, iss, leak)

    #classic working of the reservoir
    if feedback==False:
        ## test set = train set
        states_out_train, internal_states_train = res.train (stim_mean_train, stim_sent_train)
        ## test set not train set
        states_out_test, internal_states_test = res.test(stim_mean_test)
    #feedback working of the reservoir. !! Should be implemented directly in the reservoir class !!
    else:
        delay=1
        nb_epoch_max=4
        dim_input = stim_mean_train[0].shape[1]
        dim_output =  len(stim_sent_train[0][0])
        input_train=[]

        for (x,y) in zip( np.copy(stim_mean_train), np.copy(stim_sent_train)):
            for time_step_delay in range(delay):
                y=np.concatenate( ([[0.]*len(y[0])] , y), axis=0)
            input_train.append(np.array(  np.concatenate(   (x, y[:-delay]), axis=1 )  ))
      
        nb_train=0  
        while nb_train < nb_epoch_max:
            ## test set = train set
            states_out_train, internal_states_train = res.train (input_train, stim_sent_train)
            
            tab_feedback=[]
            for num_phrase in range(len(states_out_train)):
                #signal tresholded
                states_out_train[num_phrase]=np.array([treshold_signal(signal_t,1.5,-0.5) for signal_t in states_out_train[num_phrase]])
                if nb_train==0: #feedback kept only for the first train
                    #feedback assignation
                    feedback=np.array(states_out_train[num_phrase])
                    #signal delayed
                    for time_step_delay in range(delay):
                        feedback=np.concatenate( ([[0.]*len(feedback[0])] , feedback), axis=0)
                
                tab_feedback.append(feedback)
                input_train[num_phrase]=input_train[num_phrase].T
                input_train[num_phrase][dim_input:] = feedback[:-delay].T
                input_train[num_phrase]=input_train[num_phrase].T

            nb_train+=1

        ## test set not train set
        for t in range(0,stim_mean_test[0].shape[0],1):
            input_test=[]
            if t==0: #A REMODIFIER
                for n_phrase in range(len(stim_mean_test)):
                    input_test.append(np.concatenate(  (stim_mean_test[n_phrase][t:t+1,:] , [[0.]*len(stim_sent_train[0][0])] ) , axis=1     ) )

                states_out_test, internal_states_test = res.test(input_test)
                import copy
                states_out_test_def=copy.deepcopy(states_out_test)

            else:
                for n_phrase in range(len(stim_mean_test)):
                    #feedback assignation
                    feedback=np.array(states_out_test[n_phrase])
                    input_test.append(np.concatenate(  (stim_mean_test[n_phrase][t:t+1,:] , feedback ) , axis=1     ) )

                states_out_test, internal_states_test = res.test(input_test)
            
                for n_phrase in range(len(stim_mean_test)):
                    states_out_test_def[ n_phrase ]=np.concatenate( (states_out_test_def[n_phrase] , states_out_test[n_phrase]), axis=0  )

        states_out_test=states_out_test_def



    
    # Ecriture de la phrase de réponse
    if other_corpus_used:
        var_inutile=0

    else:

        l_recovered_construction_train = convert_l_output_activity_in_construction(l_out_act=states_out_train,
                                                                                   construction_words=construction_words,
                                                                                   min_nr_of_val_upper_thres=1)
        l_recovered_sentences_train = attribute_ocw_to_constructions(l_constructions=l_recovered_construction_train,
                                                                     l_ocw_array=l_ocw_array_train, _OCW='X')

        l_recovered_construction_test = convert_l_output_activity_in_construction(l_out_act=states_out_test,
                                                                                  construction_words=construction_words,
                                                                                  min_nr_of_val_upper_thres=2)
        l_recovered_sentences_test = attribute_ocw_to_constructions(l_constructions=l_recovered_construction_test,
                                                                    l_ocw_array=l_ocw_array_test, _OCW='X')
    
    
        ## Writting sentences to output file
        #print " *** Writting to output file ... *** "
        l_final_sent_test = []
        for list_words in l_recovered_sentences_test:
            l_final_sent_test.append(" ".join(list_words))

        
        #print " *** ... Writting done ***"
        #print "**********************************************"
        print "********************************************** "
        print " *** RECOGNIZED SENTENCES *** "
        print l_final_sent_test[0]

        write_list_in_file(l=l_final_sent_test, file_path=path_file_out)
        if return_result:   
            return l_final_sent_test

    ## Plot inputs
    if plot:
        import plotting as plotting
    
        plotting.plot_array_in_file(root_file_name="../Results/states_out_train", array_=states_out_train, titles_subset=l_construction_train, legend_=construction_words, plot_slice=None, title="", subtitle="")

        plotting.plot_array_in_file(root_file_name="../Results/states_out_test", array_=states_out_test, titles_subset=l_recovered_sentences_test, legend_=construction_words, plot_slice=None, title="", subtitle="")

    print ""
Beispiel #10
0
def main(path_file_in, path_file_out, plot=False, keep_internal_states=False, verbose=False):
    def write_list_in_file(l, file=None, file_path=None):
        """
        Write a list in a file with with one item per line (like a one column csv).
        
        If file is given, then it assumes the file is already open for writing.
        If file_path is given, then it opens the file for writing, write the list, and then close the file.
        """
        if file_path is not None:
            if file is not None:
                raise Exception, "Too much arguments. You must choose between file and file_path."
            else:
                file = open(file_path, 'wb')
        if file is None:
            raise Exception, "No file given in input."
        
        for item in l:
            file.write("%s\n" % item)
            
        if file_path is not None:
            file.close()
    
#    import Common_Tools.io_language_coding as CtIolangcod
    import sys
    sys.path.append("../Common_Tools")
    print sys.path
    import io_language_coding as CtIolangcod
    
    sys.path.append("../iCub_language")
    
    sentence_to_meaning = False
    
    # Definning parameters of stimulus (in a dictionary)
    d = {}
    d['act_time'] = 5#10#2
    d['pause'] = True#False
    d['suppl_pause_at_the_end'] = 1*d['act_time']
    d['initial_pause'] = True#False#False
    d['offset'] = False#True
    
    # Parameters for reservoir
    N = 500#1000 #100
    sr = 2#3#3#2#1
    iss = 0.01#1
    leak = 0.75#0.5#0.05
    
    ## output dic
    #    d['start_teacher'] = 1#'end'
    
    
    ## Random parameters
    seed = 5#2#4#2
    # seed 2 works with 2 sentences : both with 1 relation, 1 Canonical, 1 Non-canonical
    if seed is not None:
        mdp.numx.random.seed(seed)
        np.random.seed(seed)
#    if verbose:
#        print "Spectra radius of generated matrix before applying another spectral radius: "+str(Oger.utils.get_spectral_radius(w))
#    if spectral_radius is not None:
#        w *= d['spectral_radius'] / Oger.utils.get_spectral_radius(w)
#        if verbose:
#            print "Spectra radius matrix after applying another spectral radius: "+str(Oger.utils.get_spectral_radius(w))
#    if randomize_seed_afterwards:
#        """ redifine randomly the seed in order to not fix the seed also for other methods that are using numpy.random methods.
#        """
#        import time
#        mdp.numx.random.seed(int(time.time()*10**6))
    
#    [train_data_txt, test_data_txt] = extract_data_io(path_file=path_file_in, sentence_to_meaning=sentence_to_meaning)
    [train_data_txt, test_data_txt, sent_form_info_test] = extract_data_io(path_file=path_file_in, sentence_to_meaning=sentence_to_meaning)
    print "**************************"
    print "train data_txt", train_data_txt
    print "test data_txt", test_data_txt
    print "sent_form_info_test", sent_form_info_test
    train_corpus, train_meaning  = txt2corpus_and_meaning(train_txt=train_data_txt)
    if sentence_to_meaning:
        test_corpus = test_data_txt
    else:
        test_meaning = test_data_txt
    # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
    (l_construction_train, l_ocw_array_train, construction_words) = get_and_remove_ocw_in_corpus(corpus=train_corpus, _OCW='X')
    print "**************************"
    print "l_construction_train", l_construction_train
    print "l_ocw_array_train", l_ocw_array_train
    if sentence_to_meaning:
        (l_construction_test, l_ocw_array_test, construction_words_test) = get_and_remove_ocw_in_corpus(corpus=test_corpus, _OCW='X')
        print "l_construction_test", l_construction_test
        if construction_words!=construction_words_test:
            raise Exception, "The construction words are not the same for the train constructions and the test constructions. So the coding of sentences will be different and should provoque a future problem."
    else:
        # check if a special form of sentence is requested (canonical or non-canonical form)
        # i.e. check if there is at least one element that is not None
        print ""
        print "*** Managing sentence form ... ***"
        print "sent_form_info_test:", sent_form_info_test
        # if all sentence information is None (not attributed)
        if all(elt is None for elt in sent_form_info_test):
            # generate default form of sentence
            l_ocw_array_test = generate_l_ocw_array_in_canonical_order(l_meaning=test_meaning)
        # if at least one element is not None
        else:
            # call specific method to deal with the specified order of each meanings in the list
            l_ocw_array_test = generate_l_ocw_array_in_specified_order(l_meaning=test_meaning, l_sent_form = sent_form_info_test)
        print "*** ... sentence form managed ***"
    print "l_ocw_array_test", l_ocw_array_test
    
    ## Generating all the sentence stimulus (in order to have the same length for each sentence)
    if sentence_to_meaning:
        ## Generate the stimulus input for train and test data
        l_full_const = l_construction_train + l_construction_test
#        slice_train = slice(0,len(l_construction_train))
        slice_test = slice(len(l_construction_train),len(l_construction_train)+len(l_construction_test))
#        print "slice_train", slice_train
        print "slice_test", slice_test
    else:
        l_full_const = l_construction_train
    slice_train = slice(0,len(l_construction_train))
    (stim_full_data, l_full_offset) = CtIolangcod.generate_stim_input_nodic(l_data=l_full_const,
#                            act_time=d['act_time'], subset=None, l_input=None,
                            act_time=d['act_time'], subset=None, l_input=construction_words,
                            l_nr_word=None, mult=None, full_time=None,
                            with_offset=d['offset'], pause=d['pause'], initial_pause=d['initial_pause'],
                            suppl_pause_at_the_end=d['suppl_pause_at_the_end'], verbose=False)
    stim_sent_train = stim_full_data[slice_train]
    if sentence_to_meaning:
        stim_sent_test = stim_full_data[slice_test]
    
    print "stim_sent_train[0].shape", stim_sent_train[0].shape
    print "stim_sent_train[0].shape[0]", stim_sent_train[0].shape[0]
    
    l_m_elt = get_meaning_coding()
    print ""
    print "*** Generating meaning for train set ... ***"
    (stim_mean_train, l_meaning_code_train) = generate_meaning_stim(l_data=train_meaning, l_ocw_array=l_ocw_array_train, full_time=stim_sent_train[0].shape[0], l_m_elt=l_m_elt, verbose=False)
    print "*** ... meaning generated for train set ***"
    print "l_m_elt", l_m_elt
    print "stim_mean_train[0].shape", stim_mean_train[0].shape
    print "l_meaning_code_train", l_meaning_code_train
    print ""
    if not sentence_to_meaning:
        print "*** Generating meaning for test set ... ***"
        (stim_mean_test, l_meaning_code_test) = generate_meaning_stim(l_data=test_meaning, l_ocw_array=l_ocw_array_test, full_time=stim_sent_train[0].shape[0], l_m_elt=l_m_elt, verbose=False)
        print "*** ... meaning generated for test set ***"
        print ""
    
    reservoir = Oger.nodes.LeakyReservoirNode(output_dim = N, spectral_radius = sr, input_scaling =iss, nonlin_func = np.tanh, leak_rate = leak)
    read_out = mdp.nodes.LinearRegressionNode(use_pinv=True, with_bias=True)
    flow = mdp.Flow([reservoir, read_out])
    if keep_internal_states:
        Oger.utils.make_inspectable(mdp.Flow)
    
    print "Train and test"
#    (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
    ## test set = train set
    (states_out_train, internal_states_train, internal_outputs_train, neuron_states_train) = \
        _teach_and_test_flow(inputs_train_set=stim_mean_train, teacher_outputs_train_set=stim_sent_train, inputs_test_set=stim_mean_train, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)
    ## test set not train set
    (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
        _test_flow(inputs_test_set=stim_mean_test, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)
#    (states_out_test, internal_states_test, internal_outputs_test, neuron_states_test) = \
#        _teach_and_test_flow(inputs_train_set=stim_mean_train, teacher_outputs_train_set=stim_sent_train, inputs_test_set=stim_mean_test, _flow=flow, _reservoir=reservoir, keep_internal_states=keep_internal_states)
    
    
    for i in range(len(stim_mean_train)):
        print "len(stim_mean_train)", len(stim_mean_train)
        print "len(l_meaning_code_train)", len(l_meaning_code_train)
        print l_meaning_code_train[i]
        print (stim_mean_train[0]==stim_mean_train[i])
        print (l_meaning_code_train[0]==l_meaning_code_train[i])
    
    # Ecriture de la phrase de réponse
    print ""
    print "**********************************************"
    print "*** Processing recovery of train sentences ..."
    l_recovered_construction_train = convert_l_output_activity_in_construction(l_out_act=states_out_train,
                                                                               construction_words=construction_words,
                                                                               min_nr_of_val_upper_thres=1)
    l_recovered_sentences_train = attribute_ocw_to_constructions(l_constructions=l_recovered_construction_train,
                                                                 l_ocw_array=l_ocw_array_train, _OCW='X')
    print "*** l_recovered_sentences_train: ***"
    for s in l_recovered_sentences_train:
        print s
    print "**********************************************"
    print ""
    print "**********************************************"
    print "*** Processing recovery of test sentences ..."
    l_recovered_construction_test = convert_l_output_activity_in_construction(l_out_act=states_out_test,
                                                                              construction_words=construction_words,
                                                                              min_nr_of_val_upper_thres=2)
    l_recovered_sentences_test = attribute_ocw_to_constructions(l_constructions=l_recovered_construction_test,
                                                                l_ocw_array=l_ocw_array_test, _OCW='X')
    print "*** l_recovered_sentences_test: ***"
    for s in l_recovered_sentences_test:
        print s
    print "**********************************************"
    
    
    ## Writting sentences to output file
    print " *** Writting to output file ... *** "
    l_final_sent_test = []
    for list_words in l_recovered_sentences_test:
        l_final_sent_test.append(" ".join(list_words))
    #ecrire une seule ligne simple dans un fichier la phrase attendue en mode test
    write_list_in_file(l=l_final_sent_test, file_path=path_file_out)
    print " *** ... Writting done ***"
    print "**********************************************"
    
    
    ## Plot inputs
    if plot:
        print " *** Plotting to output file ... *** "
        import oct2011.plotting as plotting
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_sent_train", array_=stim_sent_train, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_sent_test", array_=stim_sent_test, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_mean_train0", array_=stim_mean_train[0].T, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_mean_train1", array_=stim_mean_train[1].T, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_mean_train_T", array_=stim_mean_train[0].T, plot_slice=None, title="", subtitle="")
    
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_train", array_=states_out_train, titles_subset=l_construction_train, legend_=construction_words, plot_slice=None, title="", subtitle="")
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_train_recov", array_=states_out_train, titles_subset=l_recovered_sentences_train, legend_=construction_words, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_train_detail", array_=states_out_train[0].T, titles_subset=l_construction_train[0], plot_slice=None, title="", subtitle="")
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/states_out_test", array_=states_out_test, titles_subset=l_recovered_sentences_test, legend_=construction_words, plot_slice=None, title="", subtitle="")
    
    
        ## Plot internal states
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/intern_states_train", array_=internal_states_train, titles_subset=l_construction_train, plot_slice=None, title="", subtitle="")
        plotting.plot_array_in_file(root_file_name="../../RES_TEMP/intern_states_test", array_=internal_states_test, titles_subset=l_ocw_array_test, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_int_states", array_=out, plot_slice=None, title="", subtitle="")
    #    plotting.plot_array_in_file(root_file_name="../../RES_TEMP/test_int_states_T", array_=out.T, plot_slice=None, title="", subtitle="")
        print " *** ... Plotting to output file done *** "
        print "**********************************************"
Beispiel #11
0
    def train(self,
              sent_form_info_train,
              train_meaning,
              train_corpus,
              d,
              sr=3,
              iss=0.1,
              leak=0.1,
              ridge=10**-1):
        import io_language_coding as CtIolangcod
        ## Random parameters
        import time
        millis = int(round(time.time()))
        seed = millis  #2#4#2

        if seed is not None:
            mdp.numx.random.seed(seed)
            np.random.seed(seed)

        # making the list of constructions (refering to "construction grammar"), a construction is a sentence without its open class words (Nouns and Verbs)
        (l_construction_train,
         construction_words) = self.get_and_remove_ocw_in_corpus(
             corpus=train_corpus, _OCW='X')
        l_ocw_array_train = self.generate_l_ocw_array(sent_form_info_train,
                                                      train_meaning)

        l_full_const = l_construction_train
        slice_train = slice(0, len(l_construction_train))
        (
            stim_full_data, l_full_offset
        ) = CtIolangcod.generate_stim_input_nodic(
            l_data=l_full_const,
            #                            act_time=d['act_time'], subset=None, l_input=None,
            act_time=d['act_time'],
            subset=None,
            l_input=construction_words,
            l_nr_word=None,
            mult=None,
            full_time=None,
            with_offset=d['offset'],
            pause=d['pause'],
            initial_pause=d['initial_pause'],
            suppl_pause_at_the_end=d['suppl_pause_at_the_end'],
            verbose=False)
        stim_sent_train = stim_full_data[slice_train]
        l_m_elt = self.get_meaning_coding(
            max_nr_ocw=self.imax_nr_ocw,
            max_nr_actionrelation=self.imax_nr_actionrelation,
            elt_pred=self.l_elt_pred)

        (stim_mean_train, l_meaning_code_train) = self.generate_meaning_stim(
            l_structure=sent_form_info_train,
            full_time=stim_sent_train[0].shape[0],
            l_m_elt=l_m_elt)

        # Reservoir and Read-out definitions
        res = reservoir.Reservoir(self.iNbNeurons, sr, iss, leak)

        #classic working of the reservoir without feedback

        ## test set = train set
        states_out_train, internal_states_train = res.train(
            stim_mean_train, stim_sent_train)

        return l_ocw_array_train, states_out_train, construction_words, internal_states_train, res, stim_mean_train, stim_sent_train, l_m_elt