def generate_vector(self, sentence_id=None): curSentID = self.file_reader.get_current_sentence_id() if all([e == None for e in self.vector_configuration.values()]): raise exp.noneValueError( 'All values of vector configuration is "None"') elif any([ e != None and not isinstance(e, base.vectorReader) for e in self.vector_configuration.values() ]): raise TypeError( 'Invalid vector configuration value found.\nFound: {}'.format( self.vector_configuration)) vectorKeys = sorted([ k for k in self.vector_configuration.keys() if self.vector_configuration.get(k) != None ]) self.file_reader.set_current_sentence(sentence_id) curSentence = self.file_reader.get_current_sentence() if curSentence == None: raise exp.noneValueError('Sentence cannot be "None"') elif not isinstance(curSentence, list): raise TypeError('Sentence must be a list.\nFound: {}'.format( type(curSentence))) elif not all( [isinstance(e, base.annotatedString) for e in curSentence]): raise TypeError( 'Sentence must be a list of "annotatedString".\nFound: {}', [type(e) for e in curSentence]) curSentenceMap = {} for tok in curSentence: vector_list = [] for key in vectorKeys: vector_list.append( self.vector_configuration.get(key).get_vector( tok.getValue(key))) curSentenceMap[tok.getValue(utils.TID)] = npcat(vector_list) if self.vector_dimension == None: self.vector_dimension = len(curSentenceMap[1]) self.file_reader.set_current_sentence(curSentID) return curSentenceMap
def vectorize(self): self.file_reader.reset() if all([e == None for e in self.vector_configuration.values()]): raise exp.noneValueError( 'All values of vector configuration is "None"') elif any([ e != None and not isinstance(e, base.vectorReader) for e in self.vector_configuration.values() ]): raise TypeError( 'Invalid vector configuration value found.\nFound: {}'.format( self.vector_configuration)) vectorKeys = sorted([ k for k in self.vector_configuration.keys() if self.vector_configuration.get(k) != None ]) curSentence = self.file_reader.get_current_sentence() while True: if curSentence == None: raise exp.noneValueError('Sentence cannot be "None"') elif not isinstance(curSentence, list): raise TypeError('Sentence must be a list') elif not all( [isinstance(e, base.annotatedString) for e in curSentence]): raise TypeError('Sentence must be a list of "annotatedString"') curSentenceMap = {} for tok in curSentence: vector_list = [] for key in vectorKeys: vector_list.append( self.vector_configuration.get(key).get_vector( tok.getValue(key))) curSentenceMap[tok.getValue(utils.TID)] = npcat(vector_list) self.sentence_map[ self.file_reader.get_current_sentence_id()] = curSentenceMap try: curSentence = self.file_reader.get_next_sentence() except exp.lastElementWarning: break
def get_vector(self, key=None): if key == None: raise exp.noneValueError('Vector search key cannot be "None"') elif not isinstance(key, dict): raise TypeError( 'Vector search key must be a dict object.\nFound: <{}>'.format( type(key))) for k in key.keys(): if k not in self.classes: raise KeyError( 'Class doesnot exist in the vocabulary.\nFound: {}'.format( k)) elif key.get(k) not in self.elements.get(k): raise KeyError( 'A value for the class::{} doesnot exist.\nFound: {}'. format(k, key.get(k))) vectorList = [] for c in self.classes: vectorPart = npzeros(self.dimension.get(c)) if c in key.keys(): vectorPart[self.elements.get(c).index(key.get(c))] = 1.0 vectorList.append(vectorPart) return npcat(vectorList)
def update_cell(self, coords, val): rows_before = self.m[0:coords[0]] row = self.m[coords[0]] rows_after = self.m[coords[0] + 1:] row.A1[coords[1]] = val self.m = (npcat((rows_before, row, rows_after)))
def populate_spiral(self): new_m = npcat((matrix('0 ' * len(self.m[0].A[0])), self.m), axis=0) new_m = npcat((new_m, matrix(('0 ;' * len(new_m))[:-1])), axis=1) new_m = npcat((matrix(('0 ;' * len(new_m))[:-1]), new_m), axis=1) new_m = npcat((new_m, matrix('0 ' * len(new_m[0].A[0]))), axis=0) self.m = new_m
def get_null_vector(self): return npcat([npzeros(self.dimension.get(c)) for c in self.classes])
def __populate_data_metrix(self): self.input_reader.reset() curSentence = self.input_reader.get_current_sentence() while True: """ print '>>SENT[', self.input_reader.get_current_sentence_id(), ']', len(curSentence) print curSentence print """ # input data generation sentID = self.input_reader.get_current_sentence_id() curSentence.sort(key=lambda x: x.getValue(utils.TID)) # input generation vectorMap = self.vector_reader.generate_vector(sentID) inputVectors = [vectorMap.get(k) for k in sorted(vectorMap.keys())] #inputVectors = [self.vector_reader.get_vector(self.input_reader.get_current_sentence_id(), t.getValue(utils.TID)) for t in curSentence] for i in range(self.window_width - 1): inputVectors[:0] = [self.vector_reader.get_null_vector()] inputVectors.append(self.vector_reader.get_null_vector()) startIndex = 0 endIndex = self.window_width counter = 0 while True: #print [inputVectors[i] for i in range(startIndex, endIndex)] dataPoint = npcat( [inputVectors[i] for i in range(startIndex, endIndex)]) self.input_data_matrix[(sentID, counter)] = dataPoint if endIndex == len(inputVectors): break else: startIndex += 1 endIndex += 1 counter += 1 # Output generation for i in range(self.window_width - 1): curSentence[:0] = [utils.NULL] curSentence.append(utils.NULL) startIndex = 0 endIndex = self.window_width counter = 0 while True: dataPoint = [] for e in curSentence[startIndex:endIndex]: if e == utils.NULL: dataPoint.append( [self.relation_reader.get_null_vector()] * (self.window_width + 3)) else: hid = e.getValue(utils.RELATION_HEAD) rmap = [self.relation_reader.get_null_vector() ] * (self.window_width + 3) rel = self.relation_reader.get_vector( key=e.getValue(utils.RELATION)) if hid == 0: rmap[0] = rel dataPoint.append(rmap) else: hIndex = next( (j for j, item in enumerate(curSentence) if item != utils.NULL and item.getValue(utils.TID) == hid), -1) if hIndex == -1: raise KeyError('Invalid token ID found') elif hIndex < startIndex: rmap[1] = rel dataPoint.append(rmap) elif hIndex >= endIndex: rmap[2] = rel dataPoint.append(rmap) else: rmap[3 + hIndex - startIndex] = rel dataPoint.append(rmap) self.output_data_matrix[(sentID, counter)] = npcat( [npcat(e) for e in dataPoint]) if endIndex == len(inputVectors): break else: startIndex += 1 endIndex += 1 counter += 1 curSentence = None try: curSentence = self.input_reader.get_next_sentence() except exp.lastElementWarning: break