def instance(self, row, diags, procs, gt): data = np.zeros(int(self.word2vec_dims / 2), dtype=np.float32) excludes = [] # sum over all vectors (first vector is the code token) for diag in diags: if self.code_type in ['pdx', 'sdx']: excludes.append(diag) for t in self.vectors_by_code['ICD_' + diag]: data += t data = unitvec(data) data_procedures = np.zeros(int(self.word2vec_dims / 2), dtype=np.float32) for proc in procs: if self.code_type == 'srg': excludes.append(proc) for t in self.vectors_by_code['CHOP_' + proc]: data_procedures += t data_procedures = unitvec(data_procedures) data = np.append(data, data_procedures) data.resize(self.vector_size) for i, var in enumerate(self.demo_variables_to_use): data[self.word2vec_dims + i] = self.convert_demographic_variable(row, var) return [data, gt, excludes]
def instance(self, row, diags, procs, gt): sequence = [] demographic = np.zeros(self.vector_size, dtype=np.float32) for i, var in enumerate(self.demo_variables_to_use): demographic[self.word2vec_dims + i] = self.convert_demographic_variable(row, var) sequence.append(demographic) self.demo_vars.append(demographic) excludes = [] for diag in diags: data = np.zeros(self.word2vec_dims, dtype=np.float32) if self.code_type in ['pdx', 'sdx']: excludes.append(diag) for t in self.vectors_by_code['ICD_' + diag]: data += t data = unitvec(data) data.resize(self.vector_size) sequence.append(data) for proc in procs: data = np.zeros(self.word2vec_dims, dtype=np.float32) if self.code_type == 'srg': excludes.append(proc) for t in self.vectors_by_code['CHOP_' + proc]: data += t data = unitvec(data) data.resize(self.vector_size) sequence.append(data) return [sequence, gt, excludes]
def instance(self, row, diags, procs, gt): sequence = [] demographic = np.zeros(self.vector_size, dtype=np.float32) for i, var in enumerate(self.demo_variables_to_use): demographic[self.word2vec_dims + i] = self.convert_demographic_variable(row, var) sequence.append(demographic) self.demo_vars.append(demographic) excludes = [] for diag in diags: data = np.zeros(self.word2vec_dims, dtype=np.float32) if self.code_type in ["pdx", "sdx"]: excludes.append(diag) for t in self.vectors_by_code["ICD_" + diag]: data += t data = unitvec(data) data.resize(self.vector_size) sequence.append(data) for proc in procs: data = np.zeros(self.word2vec_dims, dtype=np.float32) if self.code_type == "srg": excludes.append(proc) for t in self.vectors_by_code["CHOP_" + proc]: data += t data = unitvec(data) data.resize(self.vector_size) sequence.append(data) return [sequence, gt, excludes]
if(code.startswith(code_type)): code_vocab.append(code) vector_size = vectors_by_codes[code_vocab[0]][0].shape[0] print("Vector size is " + str(vector_size)) average_vector_by_code = np.zeros((len(code_vocab), vector_size), dtype=np.float32) for i, code in enumerate(code_vocab): vectors = vectors_by_codes[code] data = np.zeros(vector_size, dtype=np.float32) # sum over all vectors (first vector is the code token) for v in vectors: data += v data = unitvec(data) average_vector_by_code[i] = data tokenizer = GermanTokenizer() load_time = time.clock() - start print('Loading vectors took ' + str(load_time) + ' seconds') start = time.clock() print("Search..") tokens = tokenizer.tokenize(phrase) print(tokens) average_phrase = np.zeros(vector_size, dtype=np.float32) for token in tokens: if token in vector_by_token.keys():
code_vocab.append(code) vector_size = vectors_by_codes[code_vocab[0]][0].shape[0] print("Vector size is " + str(vector_size)) average_vector_by_code = np.zeros((len(code_vocab), vector_size), dtype=np.float32) for i, code in enumerate(code_vocab): vectors = vectors_by_codes[code] data = np.zeros(vector_size, dtype=np.float32) # sum over all vectors (first vector is the code token) for v in vectors: data += v data = unitvec(data) average_vector_by_code[i] = data tokenizer = GermanTokenizer() load_time = time.clock() - start print('Loading vectors took ' + str(load_time) + ' seconds') start = time.clock() print("Search..") tokens = tokenizer.tokenize(phrase) print(tokens) average_phrase = np.zeros(vector_size, dtype=np.float32) for token in tokens: if token in vector_by_token.keys():