예제 #1
0
def create_y(data):
    y = []
    for doc in data:
        for sentence in doc.sentences:
            for pair in sentence.pairs:
                label = class_index["null"]
                if pair.ddi == "true":
                    label = class_index[pair.type]
                y.append(label)
    return y


### SPLIT DATASET ###

data = read_dataset()
n_docs = len(data)

np.random.seed(42)

train_amount = 0.7
train_ids = choice(n_docs, int(train_amount * n_docs), replace=False)
test_ids = [i for i in range(n_docs) if i not in train_ids]

training = [data[i] for i in train_ids]
test = [data[i] for i in test_ids]

print("%i training documents" % len(training))
print("%i test documents" % len(test))

### TRAINING ###
예제 #2
0
    #features.append(("drug1_type", drug1_type))
    #features.append(("drug2_type", drug2_type))
    #drug1_name = pair.e1.text
    #drug2_name = pair.e2.text
    #features.append(("drug_name", drug1_name))
    #features.append(("drug_name", drug2_name))
    
    same_drug = pair.e1.text.lower() == pair.e2.text.lower()
    features.append(("same_drug", same_drug))
    
    return features

if __name__ == "__main__":
    np.random.seed(42)
    
    data = read_dataset()
    n_docs = len(data)
    
    n_folds = 10
    folds = k_folds(n_docs, n_folds)
    if TEST:
        n_folds = 1
    
    classes = ["int", "effect", "none", "mechanism", "advise"]
    cv_results = {}
    cv_precisions = []
    cv_recalls = []
    cv_fs = []
    cv_2class_precisions = []
    cv_2class_recalls = []
    cv_2class_fs = []