Esempio n. 1
0
    def forward(self, seq_dignosis_codes, seq_time_step, batch_labels, options,
                maxlen):
        seq_time_step = np.array(list(units.pad_time(seq_time_step, options)))
        lengths = torch.from_numpy(
            np.array([len(seq) for seq in seq_dignosis_codes])).cuda()
        diagnosis_codes, labels, mask, mask_final, mask_code = units.pad_matrix_new(
            seq_dignosis_codes, batch_labels, options)
        if options['use_gpu']:
            diagnosis_codes = torch.LongTensor(diagnosis_codes).cuda()
            mask_mult = torch.BoolTensor(1 - mask).unsqueeze(2).cuda()
            mask_final = torch.Tensor(mask_final).unsqueeze(2).cuda()
            mask_code = torch.Tensor(mask_code).unsqueeze(3).cuda()
        else:
            diagnosis_codes = torch.LongTensor(diagnosis_codes)
            mask_mult = torch.BoolTensor(1 - mask).unsqueeze(2)
            mask_final = torch.Tensor(mask_final).unsqueeze(2)
            mask_code = torch.Tensor(mask_code).unsqueeze(3)
        features = self.feature_encoder(diagnosis_codes, mask_mult, mask_code,
                                        seq_time_step, lengths)
        final_statues = features * mask_final
        final_statues = final_statues.sum(1, keepdim=True)
        quiryes = self.relu(self.quiry_layer(final_statues))

        self_weight = self.get_self_attention(features, quiryes, mask_mult)
        total_weight = self_weight
        total_weight = total_weight / (
            torch.sum(total_weight, 1, keepdim=True) + 1e-5)
        weighted_features = features * total_weight
        averaged_features = torch.sum(weighted_features, 1)
        averaged_features = self.dropout(averaged_features)
        predictions = self.classify_layer(averaged_features)
        labels = torch.LongTensor(labels)
        if options['use_gpu']:
            labels = labels.cuda()
        return predictions, labels, self_weight
Esempio n. 2
0
    def forward(self, seq_dignosis_codes, seq_time_step, batch_labels, options,
                maxlen):
        seq_time_step = np.array(list(units.pad_time(seq_time_step, options)))
        lengths = torch.from_numpy(
            np.array([len(seq) for seq in seq_dignosis_codes])).cuda()
        diagnosis_codes, labels, mask, mask_final, mask_code = units.pad_matrix_new(
            seq_dignosis_codes, batch_labels, options)
        if options['use_gpu']:
            diagnosis_codes = torch.LongTensor(diagnosis_codes).cuda()
            mask_mult = torch.BoolTensor(1 - mask).unsqueeze(2).cuda()
            mask_final = torch.Tensor(mask_final).unsqueeze(2).cuda()
            mask_code = torch.Tensor(mask_code).unsqueeze(3).cuda()
        else:
            diagnosis_codes = torch.LongTensor(diagnosis_codes)
            mask_mult = torch.BoolTensor(1 - mask).unsqueeze(2)
            mask_final = torch.Tensor(mask_final).unsqueeze(2)
            mask_code = torch.Tensor(mask_code).unsqueeze(3)
        features = self.feature_encoder(diagnosis_codes, mask_mult, mask_code,
                                        seq_time_step, lengths)
        final_statues = features * mask_final
        final_statues = final_statues.sum(1)

        predictions = self.classify_layer(final_statues)
        labels = torch.LongTensor(labels)
        if options['use_gpu']:
            labels = labels.cuda()
        return predictions, labels, None
def load_hita_input(input_jsonl_path, max_seq_len):
    with open("./data/hfdata/hf_code2idx_new.pickle", "rb") as fin:
        code2id = pickle.load(fin)
    n_diagnosis_codes = len(code2id)
    diagnosis_codes = []
    labels = []
    qids = []
    time_step = []
    with open(input_jsonl_path, "r", encoding="utf-8") as fin:
        for line in fin:
            input_json = json.loads(line)
            qids.append(input_json['id'])
            record_icd = input_json["medical_records"]["record_icd"]
            time_dis = input_json["medical_records"]["time_distance"]
            label = input_json["heart_diseases"]["hf_label"]
            for i in range(len(record_icd)):
                for j in range(len(record_icd[i])):
                    record_icd[i][j] = code2id[record_icd[i][j]]
            diagnosis_codes.append(record_icd)
            time_step.append(time_dis)
            labels.append(label)
    # diagnosis_codes = np.array(diagnosis_codes)
    # time_step = np.array(time_step)
    diagnosis_codes, time_step = units.adjust_input(diagnosis_codes, time_step,
                                                    max_seq_len,
                                                    n_diagnosis_codes)
    lengths = np.array([max_seq_len + 1 for seq in diagnosis_codes])
    seq_time_step = np.array(list(units.pad_time(time_step, max_seq_len + 1)))
    lengths = torch.from_numpy(lengths)
    diagnosis_codes, mask, mask_final, mask_code = units.pad_matrix_new(
        diagnosis_codes, n_diagnosis_codes, max_seq_len + 1)
    diagnosis_codes = torch.LongTensor(diagnosis_codes)
    mask_mult = torch.ByteTensor(1 - mask).unsqueeze(2)
    mask_final = torch.Tensor(mask_final).unsqueeze(2)
    mask_code = torch.Tensor(mask_code).unsqueeze(3)
    seq_time_step = torch.Tensor(seq_time_step).unsqueeze(2) / 180
    labels = torch.tensor(labels, dtype=torch.long)
    diagnosis_codes, seq_time_step, mask_mult, mask_final, mask_code, lengths = \
        diagnosis_codes.unsqueeze(1), seq_time_step.unsqueeze(1), mask_mult.unsqueeze(1), \
        mask_final.unsqueeze(1), mask_code.unsqueeze(1), lengths.unsqueeze(1)
    return qids, labels, diagnosis_codes, seq_time_step, mask_mult, mask_final, mask_code, lengths