コード例 #1
0
 def instance(self, row, diags, procs, gt):
     data = np.zeros(int(self.word2vec_dims / 2), dtype=np.float32)
     excludes = []
     # sum over all vectors (first vector is the code token)
     for diag in diags:
         if self.code_type in ['pdx', 'sdx']:
             excludes.append(diag)
         for t in self.vectors_by_code['ICD_' + diag]:
             data += t
     data = unitvec(data)
     
     data_procedures = np.zeros(int(self.word2vec_dims / 2), dtype=np.float32)
     for proc in procs:
         if self.code_type == 'srg':
             excludes.append(proc)
         for t in self.vectors_by_code['CHOP_' + proc]:
             data_procedures += t
             
     data_procedures = unitvec(data_procedures)
     data = np.append(data, data_procedures)
     
     data.resize(self.vector_size)
     
     for i, var in enumerate(self.demo_variables_to_use):
         data[self.word2vec_dims + i] = self.convert_demographic_variable(row, var)
     
     return [data, gt, excludes]
コード例 #2
0
    def instance(self, row, diags, procs, gt):
        sequence = []
        demographic = np.zeros(self.vector_size, dtype=np.float32)
        for i, var in enumerate(self.demo_variables_to_use):
            demographic[self.word2vec_dims +
                        i] = self.convert_demographic_variable(row, var)
        sequence.append(demographic)
        self.demo_vars.append(demographic)

        excludes = []
        for diag in diags:
            data = np.zeros(self.word2vec_dims, dtype=np.float32)
            if self.code_type in ['pdx', 'sdx']:
                excludes.append(diag)
            for t in self.vectors_by_code['ICD_' + diag]:
                data += t
            data = unitvec(data)
            data.resize(self.vector_size)
            sequence.append(data)

        for proc in procs:
            data = np.zeros(self.word2vec_dims, dtype=np.float32)
            if self.code_type == 'srg':
                excludes.append(proc)
            for t in self.vectors_by_code['CHOP_' + proc]:
                data += t
            data = unitvec(data)
            data.resize(self.vector_size)
            sequence.append(data)

        return [sequence, gt, excludes]
コード例 #3
0
    def instance(self, row, diags, procs, gt):
        sequence = []
        demographic = np.zeros(self.vector_size, dtype=np.float32)
        for i, var in enumerate(self.demo_variables_to_use):
            demographic[self.word2vec_dims + i] = self.convert_demographic_variable(row, var)
        sequence.append(demographic)
        self.demo_vars.append(demographic)

        excludes = []
        for diag in diags:
            data = np.zeros(self.word2vec_dims, dtype=np.float32)
            if self.code_type in ["pdx", "sdx"]:
                excludes.append(diag)
            for t in self.vectors_by_code["ICD_" + diag]:
                data += t
            data = unitvec(data)
            data.resize(self.vector_size)
            sequence.append(data)

        for proc in procs:
            data = np.zeros(self.word2vec_dims, dtype=np.float32)
            if self.code_type == "srg":
                excludes.append(proc)
            for t in self.vectors_by_code["CHOP_" + proc]:
                data += t
            data = unitvec(data)
            data.resize(self.vector_size)
            sequence.append(data)

        return [sequence, gt, excludes]
コード例 #4
0
ファイル: search_codes.py プロジェクト: eonum/medcodelearn
     if(code.startswith(code_type)):
         code_vocab.append(code)
 
 vector_size = vectors_by_codes[code_vocab[0]][0].shape[0]
 
 print("Vector size is " + str(vector_size))
 
 average_vector_by_code = np.zeros((len(code_vocab), vector_size), dtype=np.float32)
 
 for i, code in enumerate(code_vocab):
     vectors = vectors_by_codes[code]
     data = np.zeros(vector_size, dtype=np.float32)
     # sum over all vectors (first vector is the code token)
     for v in vectors:
         data += v
     data = unitvec(data)
     average_vector_by_code[i] = data
     
 tokenizer = GermanTokenizer()
 
 load_time = time.clock() - start
 print('Loading vectors took ' + str(load_time) + ' seconds')
 start = time.clock()
 
 print("Search..")
 
 tokens = tokenizer.tokenize(phrase)
 print(tokens)
 average_phrase = np.zeros(vector_size, dtype=np.float32)
 for token in tokens:
     if token in vector_by_token.keys():
コード例 #5
0
            code_vocab.append(code)

    vector_size = vectors_by_codes[code_vocab[0]][0].shape[0]

    print("Vector size is " + str(vector_size))

    average_vector_by_code = np.zeros((len(code_vocab), vector_size),
                                      dtype=np.float32)

    for i, code in enumerate(code_vocab):
        vectors = vectors_by_codes[code]
        data = np.zeros(vector_size, dtype=np.float32)
        # sum over all vectors (first vector is the code token)
        for v in vectors:
            data += v
        data = unitvec(data)
        average_vector_by_code[i] = data

    tokenizer = GermanTokenizer()

    load_time = time.clock() - start
    print('Loading vectors took ' + str(load_time) + ' seconds')
    start = time.clock()

    print("Search..")

    tokens = tokenizer.tokenize(phrase)
    print(tokens)
    average_phrase = np.zeros(vector_size, dtype=np.float32)
    for token in tokens:
        if token in vector_by_token.keys():