Example #1
0
 def generate_vector(self, sentence_id=None):
     curSentID = self.file_reader.get_current_sentence_id()
     if all([e == None for e in self.vector_configuration.values()]):
         raise exp.noneValueError(
             'All values of vector configuration is "None"')
     elif any([
             e != None and not isinstance(e, base.vectorReader)
             for e in self.vector_configuration.values()
     ]):
         raise TypeError(
             'Invalid vector configuration value found.\nFound: {}'.format(
                 self.vector_configuration))
     vectorKeys = sorted([
         k for k in self.vector_configuration.keys()
         if self.vector_configuration.get(k) != None
     ])
     self.file_reader.set_current_sentence(sentence_id)
     curSentence = self.file_reader.get_current_sentence()
     if curSentence == None:
         raise exp.noneValueError('Sentence cannot be "None"')
     elif not isinstance(curSentence, list):
         raise TypeError('Sentence must be a list.\nFound: {}'.format(
             type(curSentence)))
     elif not all(
         [isinstance(e, base.annotatedString) for e in curSentence]):
         raise TypeError(
             'Sentence must be a list of "annotatedString".\nFound: {}',
             [type(e) for e in curSentence])
     curSentenceMap = {}
     for tok in curSentence:
         vector_list = []
         for key in vectorKeys:
             vector_list.append(
                 self.vector_configuration.get(key).get_vector(
                     tok.getValue(key)))
         curSentenceMap[tok.getValue(utils.TID)] = npcat(vector_list)
     if self.vector_dimension == None:
         self.vector_dimension = len(curSentenceMap[1])
     self.file_reader.set_current_sentence(curSentID)
     return curSentenceMap
Example #2
0
 def vectorize(self):
     self.file_reader.reset()
     if all([e == None for e in self.vector_configuration.values()]):
         raise exp.noneValueError(
             'All values of vector configuration is "None"')
     elif any([
             e != None and not isinstance(e, base.vectorReader)
             for e in self.vector_configuration.values()
     ]):
         raise TypeError(
             'Invalid vector configuration value found.\nFound: {}'.format(
                 self.vector_configuration))
     vectorKeys = sorted([
         k for k in self.vector_configuration.keys()
         if self.vector_configuration.get(k) != None
     ])
     curSentence = self.file_reader.get_current_sentence()
     while True:
         if curSentence == None:
             raise exp.noneValueError('Sentence cannot be "None"')
         elif not isinstance(curSentence, list):
             raise TypeError('Sentence must be a list')
         elif not all(
             [isinstance(e, base.annotatedString) for e in curSentence]):
             raise TypeError('Sentence must be a list of "annotatedString"')
         curSentenceMap = {}
         for tok in curSentence:
             vector_list = []
             for key in vectorKeys:
                 vector_list.append(
                     self.vector_configuration.get(key).get_vector(
                         tok.getValue(key)))
             curSentenceMap[tok.getValue(utils.TID)] = npcat(vector_list)
         self.sentence_map[
             self.file_reader.get_current_sentence_id()] = curSentenceMap
         try:
             curSentence = self.file_reader.get_next_sentence()
         except exp.lastElementWarning:
             break
Example #3
0
 def get_vector(self, key=None):
     if key == None:
         raise exp.noneValueError('Vector search key cannot be "None"')
     elif not isinstance(key, dict):
         raise TypeError(
             'Vector search key must be a dict object.\nFound: <{}>'.format(
                 type(key)))
     for k in key.keys():
         if k not in self.classes:
             raise KeyError(
                 'Class doesnot exist in the vocabulary.\nFound: {}'.format(
                     k))
         elif key.get(k) not in self.elements.get(k):
             raise KeyError(
                 'A value for the class::{} doesnot exist.\nFound: {}'.
                 format(k, key.get(k)))
     vectorList = []
     for c in self.classes:
         vectorPart = npzeros(self.dimension.get(c))
         if c in key.keys():
             vectorPart[self.elements.get(c).index(key.get(c))] = 1.0
         vectorList.append(vectorPart)
     return npcat(vectorList)
Example #4
0
 def update_cell(self, coords, val):
     rows_before = self.m[0:coords[0]]
     row = self.m[coords[0]]
     rows_after = self.m[coords[0] + 1:]
     row.A1[coords[1]] = val
     self.m = (npcat((rows_before, row, rows_after)))
Example #5
0
 def populate_spiral(self):
     new_m = npcat((matrix('0 ' * len(self.m[0].A[0])), self.m), axis=0)
     new_m = npcat((new_m, matrix(('0 ;' * len(new_m))[:-1])), axis=1)
     new_m = npcat((matrix(('0 ;' * len(new_m))[:-1]), new_m), axis=1)
     new_m = npcat((new_m, matrix('0 ' * len(new_m[0].A[0]))), axis=0)
     self.m = new_m
Example #6
0
 def get_null_vector(self):
     return npcat([npzeros(self.dimension.get(c)) for c in self.classes])
Example #7
0
    def __populate_data_metrix(self):
        self.input_reader.reset()
        curSentence = self.input_reader.get_current_sentence()
        while True:
            """
            print '>>SENT[', self.input_reader.get_current_sentence_id(), ']',  len(curSentence)
            print curSentence
            print
            """
            # input data generation
            sentID = self.input_reader.get_current_sentence_id()
            curSentence.sort(key=lambda x: x.getValue(utils.TID))
            # input generation
            vectorMap = self.vector_reader.generate_vector(sentID)
            inputVectors = [vectorMap.get(k) for k in sorted(vectorMap.keys())]
            #inputVectors = [self.vector_reader.get_vector(self.input_reader.get_current_sentence_id(), t.getValue(utils.TID)) for t in curSentence]
            for i in range(self.window_width - 1):
                inputVectors[:0] = [self.vector_reader.get_null_vector()]
                inputVectors.append(self.vector_reader.get_null_vector())
            startIndex = 0
            endIndex = self.window_width
            counter = 0
            while True:
                #print [inputVectors[i] for i in range(startIndex, endIndex)]
                dataPoint = npcat(
                    [inputVectors[i] for i in range(startIndex, endIndex)])
                self.input_data_matrix[(sentID, counter)] = dataPoint
                if endIndex == len(inputVectors):
                    break
                else:
                    startIndex += 1
                    endIndex += 1
                    counter += 1

            # Output generation
            for i in range(self.window_width - 1):
                curSentence[:0] = [utils.NULL]
                curSentence.append(utils.NULL)

            startIndex = 0
            endIndex = self.window_width
            counter = 0
            while True:
                dataPoint = []
                for e in curSentence[startIndex:endIndex]:
                    if e == utils.NULL:
                        dataPoint.append(
                            [self.relation_reader.get_null_vector()] *
                            (self.window_width + 3))
                    else:
                        hid = e.getValue(utils.RELATION_HEAD)
                        rmap = [self.relation_reader.get_null_vector()
                                ] * (self.window_width + 3)
                        rel = self.relation_reader.get_vector(
                            key=e.getValue(utils.RELATION))
                        if hid == 0:
                            rmap[0] = rel
                            dataPoint.append(rmap)
                        else:
                            hIndex = next(
                                (j for j, item in enumerate(curSentence)
                                 if item != utils.NULL
                                 and item.getValue(utils.TID) == hid), -1)
                            if hIndex == -1:
                                raise KeyError('Invalid token ID found')
                            elif hIndex < startIndex:
                                rmap[1] = rel
                                dataPoint.append(rmap)
                            elif hIndex >= endIndex:
                                rmap[2] = rel
                                dataPoint.append(rmap)
                            else:
                                rmap[3 + hIndex - startIndex] = rel
                                dataPoint.append(rmap)
                self.output_data_matrix[(sentID, counter)] = npcat(
                    [npcat(e) for e in dataPoint])
                if endIndex == len(inputVectors):
                    break
                else:
                    startIndex += 1
                    endIndex += 1
                    counter += 1
            curSentence = None
            try:
                curSentence = self.input_reader.get_next_sentence()
            except exp.lastElementWarning:
                break