コード例 #1
0
ファイル: ml.py プロジェクト: emreg00/repurpose
def check_ml(data, n_run, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, split_both=False, output_file = None, model_fun = None, verbose=False, n_seed = None):
    drugs, disease_to_index, drug_to_values, se_to_index, drug_to_values_se, drug_to_values_structure, drug_to_values_target, drug_interaction_to_index, drug_to_values_interaction = data
    if prediction_type == "disease":
        # For drug repurposing (feature phenotype: side effect)
        disease_to_drugs, pairs, classes = utilities.get_drug_disease_mapping(drugs, drug_to_values, disease_to_index)
    elif prediction_type == "side effect":
        # For side effect prediction (feature phenotype: disease indication)
        disease_to_drugs, pairs, classes = utilities.get_drug_disease_mapping(drugs, drug_to_values_se, se_to_index)
        drug_to_values_se = drug_to_values
        se_to_index = disease_to_index
    elif prediction_type == "drug interaction":
        if drug_interaction_to_index is None:
            raise ValueError("Drug interaction information is missing!")
        # For drug interaction prediction (feature phenotype: side effect)
        disease_to_drugs, pairs, classes = utilities.get_drug_disease_mapping(drugs, drug_to_values_interaction, drug_interaction_to_index)
    else:
        raise ValueError("Uknown prediction_type: " + prediction_type)
    list_M_similarity = []
    if "phenotype" in features:
        drug_to_index, M_similarity_se = utilities.get_similarity(drugs, drug_to_values_se)
        list_M_similarity.append(M_similarity_se)
    if "chemical" in features:
        drug_to_index, M_similarity_chemical = utilities.get_similarity(drugs, drug_to_values_structure)
        list_M_similarity.append(M_similarity_chemical)
    if "target" in features:
        drug_to_index, M_similarity_target = utilities.get_similarity(drugs, drug_to_values_target)
        list_M_similarity.append(M_similarity_target)
    if output_file is not None:
        output_f = open(output_file, 'a')
        output_f.write("n_fold\tn_proportion\tn_subset\tmodel type\tprediction type\tfeatures\trecalculate\tdisjoint\tpairwise\tvariable\tauc.mean\tauc.sd\tauprc.mean\tauprc.sd\n")
    else:
        output_f = None
    values = []
    values2 = []
    for i in xrange(n_run): 
        if n_seed is not None:
            n_seed += i
            random.seed(n_seed)
            numpy.random.seed(n_seed)
        pairs_, classes_, cv = utilities.balance_data_and_get_cv(pairs, classes, n_fold, n_proportion, n_subset, disjoint = disjoint_cv, split_both=split_both, n_seed = n_seed)
        val, val2 = check_ml_helper(drugs, disease_to_drugs, drug_to_index, list_M_similarity, pairs_, classes_, cv, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, split_both, output_f, model_fun, verbose, n_seed)
        values.append(val)
        values2.append(val2)
    print "AUC over runs: %.1f (+/-%.1f):" % (numpy.mean(values), numpy.std(values)), map(lambda x: round(x, ndigits=1), values)
    if output_f is not None:
        output_f.write("%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n" % (n_fold, n_proportion, n_subset, model_type, prediction_type, "|".join(features), recalculate_similarity, disjoint_cv, split_both, "avg", numpy.mean(values), numpy.std(values), numpy.mean(values2), numpy.std(values2)))
        output_f.close()
    return "AUC: %.1f" % numpy.mean(values), "AUPRC: %.1f" % numpy.mean(values2)
コード例 #2
0
ファイル: ml.py プロジェクト: philge/repurpose
def check_ml(data, n_run, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, output_file = None, model_fun = None, verbose=False, n_seed = None):
    drugs, disease_to_index, drug_to_values, se_to_index, drug_to_values_se, drug_to_values_structure, drug_to_values_target = data
    if prediction_type == "disease":
	disease_to_drugs, pairs, classes = utilities.get_drug_disease_mapping(drugs, drug_to_values, disease_to_index)
    elif prediction_type == "side effect":
	# For side effect prediction
	disease_to_drugs, pairs, classes = utilities.get_drug_disease_mapping(drugs, drug_to_values_se, se_to_index)
	drug_to_values_se = drug_to_values
	se_to_index = disease_to_index
    else:
	raise ValueError("Uknown prediction_type: " + prediction_type)
    list_M_similarity = []
    if "phenotype" in features:
	drug_to_index, M_similarity_se = utilities.get_similarity(drugs, drug_to_values_se)
	list_M_similarity.append(M_similarity_se)
    if "chemical" in features:
	drug_to_index, M_similarity_chemical = utilities.get_similarity(drugs, drug_to_values_structure)
	list_M_similarity.append(M_similarity_chemical)
    if "target" in features:
	drug_to_index, M_similarity_target = utilities.get_similarity(drugs, drug_to_values_target)
	list_M_similarity.append(M_similarity_target)
    if output_file is not None:
	output_f = open(output_file, 'a')
	output_f.write("n_fold\tn_proportion\tn_subset\tmodel type\tprediction type\tfeatures\trecalculate\tdisjoint\tvariable\tauc.mean\tauc.sd\tauprc.mean\tauprc.sd\n")
    else:
	output_f = None
    values = []
    values2 = []
    for i in xrange(n_run): 
	if n_seed is not None:
	    n_seed += i
	    random.seed(n_seed)
	    numpy.random.seed(n_seed)
	pairs_, classes_, cv = utilities.balance_data_and_get_cv(pairs, classes, n_fold, n_proportion, n_subset, disjoint = disjoint_cv, n_seed = n_seed)
	val, val2 = check_ml_helper(drugs, disease_to_drugs, drug_to_index, list_M_similarity, pairs_, classes_, cv, knn, n_fold, n_proportion, n_subset, model_type, prediction_type, features, recalculate_similarity, disjoint_cv, output_f, model_fun, verbose, n_seed)
	values.append(val)
	values2.append(val2)
    print "AUC over runs: %.1f (+/-%.1f):" % (numpy.mean(values), numpy.std(values)), map(lambda x: round(x, ndigits=1), values)
    if output_f is not None:
	output_f.write("%d\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%f\t%f\n" % (n_fold, n_proportion, n_subset, model_type, prediction_type, "|".join(features), recalculate_similarity, disjoint_cv, "avg", numpy.mean(values), numpy.std(values), numpy.mean(values2), numpy.std(values2)))
	output_f.close()
    return "AUC: %.1f" % numpy.mean(values), "AUPRC: %.1f" % numpy.mean(values2)
コード例 #3
0
ファイル: tests.py プロジェクト: zeromtmu/repurpose
 def test_get_similarity_based_scores_knn2(self):
     # c1 is used for p1, c2 is used for p2
     disease_to_drugs = dict([("p1", set(["c1"])), ("p2", set(["c2"]))])
     # Get similarity between all pair of drugs
     drug_to_index, M = get_similarity(self.drugs, self.drug_to_values)
     list_M_similarity = [ M ]
     knn = 2
     drug_to_disease_to_scores = get_similarity_based_scores(self.drugs, disease_to_drugs, drug_to_index, list_M_similarity, knn)
     # c1's similarity to c2, c3, c4: 0.5, 0.5, -0.5
     # c1's score for p2: 0.5*label(c2) + 0.5*label(c3) = 0.5 * 1 + 0 = 0.5
     self.assertAlmostEqual(drug_to_disease_to_scores["c1"]["p2"][0], 0.5)
コード例 #4
0
 def test_get_similarity(self):
     drug_to_index, M = get_similarity(self.drugs, self.drug_to_values)
     drug1 = "c1"
     # c1-c2 similarity cor([1,0,0], [1,1,0]) == 0.5
     drug2 = "c2"
     self.assertEqual(M[drug_to_index[drug1]][drug_to_index[drug2]], 0.5)
     # c1-c3 similarity cor([1,0,0], [1,0,1]) == 0.5
     drug2 = "c3"
     self.assertEqual(M[drug_to_index[drug1]][drug_to_index[drug2]], 0.5)
     # c1-c4 similarity cor([1,0,0], [0,0,1]) == -0.5
     drug2 = "c4"
     self.assertEqual(M[drug_to_index[drug1]][drug_to_index[drug2]], -0.5)
コード例 #5
0
    def match(self, otherchunk, partialmatching, mismatch_penalty=0):
        """
        Check partial match (given bound variables in boundvars).
        """
        similarity = 0
        if self == otherchunk:
            return similarity
        # below starts the check that self is proper part of otherchunk. __emptyvalue is ignored. 4 cases have to be checked separately, =x, ~=x, !1, ~!1. Also, variables and their values have to be saved in boundvars. When self is not part of otherchunk the loop adds to (dis)similarity.
        for x in self:

            try:
                matching_val = getattr(otherchunk.actrchunk,
                                       x[0] + "_")  # get the value of attr
            except AttributeError:
                matching_val = None  # if it is missing, it must be None

            if isinstance(matching_val, utilities.VarvalClass):
                matching_val = matching_val.values  # the value might be written using _variablesvalues namedtuple; in that case, get it out
            varval = utilities.splitting(x[1])
            # if otherchunk.typename == "IDENTIFY_LINKS":
            #     print("xxx:",x)
            #     print(matching_val)
            #     print(varval.values)
            # print(type(varval.values))

            # checking variables, e.g., =x
            if varval.variables:
                # if matching_val == self.__emptyvalue:
                #    similarity -= 1 #these two lines would require that variables are matched only to existing values; uncomment if you want that
                var = varval.variables
                for each in self.boundvars.get("~=" + var, set()):
                    if each == matching_val:
                        if partialmatching:
                            similarity += utilities.get_similarity(
                                self._similarities, each, matching_val,
                                mismatch_penalty
                            )  # False if otherchunk's value among the values of ~=x
                        else:
                            return False
                try:
                    if self.boundvars["=" + var] != matching_val:
                        if partialmatching:
                            similarity += utilities.get_similarity(
                                self._similarities, self.boundvars["=" + var],
                                matching_val, mismatch_penalty
                            )  # False if =x does not match otherchunks' value
                        else:
                            return False
                except KeyError:
                    self.boundvars.update({
                        "=" + var: matching_val
                    })  # if boundvars lack =x, update and proceed

            # checking negvariables, e.g., ~=x
            if varval.negvariables:
                for var in varval.negvariables:
                    try:
                        if self.boundvars["=" + var] == matching_val:
                            if partialmatching:
                                similarity += utilities.get_similarity(
                                    self._similarities,
                                    self.boundvars["=" + var], matching_val,
                                    mismatch_penalty
                                )  # False if =x does not match otherchunks' value
                            else:
                                return False
                    except KeyError:
                        pass
                    self.boundvars.setdefault("~=" + var,
                                              set([])).add(matching_val)

            # checking values, e.g., 10 or !10

            if varval.values:
                val = varval.values
                if val != None and val != matching_val:  # None is the misssing value of the attribute
                    if partialmatching:
                        similarity += utilities.get_similarity(
                            self._similarities, val, matching_val,
                            mismatch_penalty)
                    else:
                        return False
            # checking negvalues, e.g., ~!10
            if varval.negvalues:
                for negval in varval.negvalues:
                    if negval == matching_val or (
                            negval in {self.__emptyvalue, 'None'}
                            and matching_val == self.__emptyvalue):
                        if partialmatching:
                            similarity += utilities.get_similarity(
                                self._similarities, negval, matching_val,
                                mismatch_penalty)
                        else:
                            return False
        if partialmatching:
            return similarity
        else:
            return True