max_sim = 0
                    for active_indexes in model_configuration["data"]["active"]:
                        intersection = _intersection_of_two_arrays(active_indexes, test_indexes)
                        sim = len(intersection) / (
                                    len(test_indexes) + len(active_indexes) - len(intersection))
                        if sim > max_sim:
                            max_sim = sim
                    score = {
                        "name": line["name"],
                        "score": max_sim
                    }
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _intersection_of_two_arrays(arr1: list, arr2: list):
    and_arr = []
    for item in arr1:
        for i in range(len(arr2)):
            if item == arr2[i]:
                and_arr.append(item)
                arr2 = arr2[:i] + arr2[(i+1):]
                break
    return and_arr


register_model(MyNbitHashedApModel.model_name, lambda: MyNbitHashedApModel())
Esempio n. 2
0
                        for group in model_configuration["configuration"][
                                "groups"]:
                            if index in group:
                                index = group[0]
                                break
                        if index not in test_active_indexes:
                            test_active_indexes.append(index)
                    max_sim = max([
                        _compute_sim(item, test_active_indexes)
                        for item in model_configuration["data"]["active"]
                    ])
                    score = {"name": line["name"], "score": max_sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _compute_sim(active_fragments: list, test_fragments: list) -> list:
    summary = 0
    for item in test_fragments:
        if item in active_fragments:
            summary += 1
    sim = summary / (len(active_fragments) + len(test_fragments) - summary)
    return sim


register_model(DeleteIndexGroupModel.model_name,
               lambda: DeleteIndexGroupModel())
Esempio n. 3
0
            molecule = Chem.MolFromSmiles(molecule_smiles)
            ecfp_fingerprint = AllChem.GetHashedMorganFingerprint(molecule,
                                                                  radius,
                                                                  nBits=nbits)
            active_molecules_ecfp.append(ecfp_fingerprint)

        first_line = True
        with open(output_file, "w", encoding="utf-8") as output_stream:
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_molecule_input = line["smiles"]
                    test_molecule_smiles = test_molecule_input.strip("\"")
                    test_molecule = Chem.MolFromSmiles(test_molecule_smiles)
                    test_mol_fingerprint = AllChem.GetHashedMorganFingerprint(
                        test_molecule, radius, nBits=nbits)
                    max_sim = max([
                        DataStructs.TanimotoSimilarity(test_mol_fingerprint,
                                                       fingerprint)
                        for fingerprint in active_molecules_ecfp
                    ])
                    score = {"name": line["name"], "score": max_sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


register_model(NbitEcfpModelHashed.model_name, lambda: NbitEcfpModelHashed())
Esempio n. 4
0
    for item in fragments:
        if item["index"] not in molecule_indexes:
            molecule_indexes.append(item["index"])
    for group in groups:
        founded_pos = False
        founded_neg = False
        for item in group:
            if (founded_pos is False) and (item in molecule_indexes):
                founded_pos = True
            elif (founded_neg is False) and (item not in molecule_indexes):
                founded_neg = True
            if founded_pos and founded_neg:
                break
        if founded_pos and founded_neg:
            for item in group:
                if item not in molecule_indexes:
                    molecule_indexes.append(item)
    return molecule_indexes


def _compute_sim(active_fragments: list, test_fragments: list) -> list:
    summary = 0
    for item in test_fragments:
        if item in active_fragments:
            summary += 1
    sim = summary / (len(active_fragments) + len(test_fragments) - summary)
    return sim


register_model(AddGroupModel.model_name, lambda: AddGroupModel())
    def save_to_json_file(self, output_file: str, model: dict):
        inputoutput_utils.save_to_json_file(output_file, model)

    def score_model(self, model_configuration: dict, fragments_file: str,
                    descriptors_file: str, output_file: str):
        inputoutput_utils.create_parent_directory(output_file)
        first_line = True
        with open(output_file, "w", encoding="utf-8") as output_stream:
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_active_indexes = []
                    for fragment in line["fragments"]:
                        if fragment["index"] not in test_active_indexes:
                            test_active_indexes.append(fragment["index"])
                    summary = 0
                    for test_index in test_active_indexes:
                        if test_index in model_configuration["data"]["active"]:
                            summary += 1
                    sim = summary / (len(model_configuration["data"]["active"])
                                     + len(test_active_indexes) - summary)
                    score = {"name": line["name"], "score": sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


register_model(BaselineModel.model_name, lambda: BaselineModel())
            molecule_smiles = active_molecule.strip("\"")
            molecule = Chem.MolFromSmiles(molecule_smiles)
            fcfp_fingerprint = AllChem.GetMorganFingerprintAsBitVect(molecule, radius,
                                                                     useFeatures=True, nBits=nbits)
            active_molecules_fcfp.append(fcfp_fingerprint)

        first_line = True
        with open(output_file, "w", encoding="utf-8") as output_stream:
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_molecule_input = line["smiles"]
                    test_molecule_smiles = test_molecule_input.strip("\"")
                    test_molecule = Chem.MolFromSmiles(test_molecule_smiles)
                    test_mol_fingerprint = AllChem.GetMorganFingerprintAsBitVect(test_molecule, radius,
                                                                        useFeatures=True, nBits=nbits)
                    max_sim = max([DataStructs.TanimotoSimilarity(test_mol_fingerprint, fingerprint)
                                   for fingerprint in active_molecules_fcfp])
                    score = {
                        "name": line["name"],
                        "score": max_sim
                    }
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


register_model(NbitFcfpModelVector.model_name, lambda: NbitFcfpModelVector())
            molecule_smiles = active_molecule.strip("\"")
            molecule = Chem.MolFromSmiles(molecule_smiles)
            ap_fingerprint = Pairs.GetHashedAtomPairFingerprint(molecule,
                                                                nBits=nbits)
            active_molecules_ap.append(ap_fingerprint)

        first_line = True
        with open(output_file, "w", encoding="utf-8") as output_stream:
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_molecule_input = line["smiles"]
                    test_molecule_smiles = test_molecule_input.strip("\"")
                    test_molecule = Chem.MolFromSmiles(test_molecule_smiles)
                    test_mol_fingerprint = Pairs.GetHashedAtomPairFingerprint(
                        test_molecule, nBits=nbits)
                    max_sim = max([
                        DataStructs.TanimotoSimilarity(test_mol_fingerprint,
                                                       fingerprint)
                        for fingerprint in active_molecules_ap
                    ])
                    score = {"name": line["name"], "score": max_sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


register_model(NbitAPModel.model_name, lambda: NbitAPModel())
                        indexes[item] += 1
        cut = num_of_active_mol * int(model_configuration["cutoff"]) / 100
        cutoff_indexes = []
        for item in indexes:
            if indexes[item] >= cut:
                cutoff_indexes.append(item)
        model = {
            "configuration": {
                "model_name": model_configuration["model_name"]
            },
            "data": {
                "active": cutoff_indexes
            }
        }
        return model

    def save_to_json_file(self, output_file: str, model: dict):
        inputoutput_utils.save_to_json_file(output_file, model)

    def score_model(self, model_configuration: dict, fragments_file: str,
                    descriptors_file: str, output_file: str):

        baseline_model = baseline.BaselineModel()
        baseline_model.score_model(model_configuration, fragments_file, descriptors_file,
                                   output_file)


register_model(CutOffModel.model_name, lambda: CutOffModel())


                                is_in_inactives = True
                                break
                        if is_in_actives & is_in_inactives is False:
                            suma += active_parameter
                        elif is_in_inactives & is_in_actives is False:
                            suma -= inactive_parameter
                        else:
                            suma += neutral_parameter
                    score = {
                        "name": name,
                        "score": suma / len(molecule["fragments"])
                    }
                    if first:
                        first = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _select_indexes(input_file: str):
    indexes = []
    with open(input_file, "r") as stream:
        for line in stream:
            line_parts = line.split(",")
            indexes.append(line_parts[1])
    indexes.pop(0)
    return indexes


register_model(PosNegIndexModel.model_name, lambda: PosNegIndexModel())
            test_active_indexes.append(group)
    return test_active_indexes


def _compute_sim(active_fragments: list, test_fragments: list,
                 groups: list) -> list:
    groups_len = 1
    for item in groups:
        groups_len += len(item)
    summary = 0
    for item in test_fragments:
        if item in active_fragments:
            summary += 1
    for group in groups:
        if group in active_fragments:
            founded = False
            for item in group:
                if item in test_fragments:
                    founded = True
                    break
            if founded:
                summary += len(group)
    if groups_len == 0:
        groups_len = 1
    sim = summary / (groups_len *
                     (len(active_fragments) + len(test_fragments) - summary))
    return sim


register_model(GroupAndAddModel.model_name, lambda: GroupAndAddModel())
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_active_indexes = []
                    for fragment in line["fragments"]:
                        if fragment["index"] not in test_active_indexes:
                            test_active_indexes.append(fragment["index"])
                    max_sim = max([
                        _compute_sim(item, test_active_indexes)
                        for item in model_configuration["data"]["active"]
                    ])
                    score = {"name": line["name"], "score": max_sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _compute_sim(active_fragments: list, test_fragments: list) -> list:
    summary = 0
    for item in test_fragments:
        if item in active_fragments:
            summary += 1
    sim = summary / (len(active_fragments) + len(test_fragments) - summary)
    return sim


register_model(BaselineActivePairModel.model_name,
               lambda: BaselineActivePairModel())
Esempio n. 12
0
                        num_of_fragment[i]:num_of_fragment[i + 1]]
                    sim = sum(prediction_of_molecule) / len(
                        prediction_of_molecule)
                    score = {"name": names_of_molecules[i], "score": sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def extract_descriptors(input_file: str, model_configuration: dict) -> list:
    descriptors = []
    with open(input_file, "r", encoding="utf-8") as stream:
        line = stream.readline()
        line_parts = line.split(",")
        if (((line_parts[1] == "index") &
             (int(model_configuration["molecules"]) == 1)) |
            ((line_parts[1] != "index") &
             (int(model_configuration["molecules"]) == 0))):
            print("Wrong input")
            exit(1)
        for line in stream:
            line_parts = line.split(",")
            descriptors.append(list(map(float, line_parts[1:])))
    return descriptors


register_model(LinearRegressionModel.model_name,
               lambda: LinearRegressionModel())
Esempio n. 13
0
                        indexes.append(fragment["index"])
                    max_sim = 0
                    for active_indexes in model_configuration["data"]["active"]:
                        intersection = _intersection_of_two_arrays(active_indexes, indexes)
                        sim = len(intersection) / (len(indexes) + len(active_indexes) - len(intersection))
                        if sim > max_sim:
                            max_sim = sim
                    score = {
                        "name": line["name"],
                        "score": max_sim
                    }
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _intersection_of_two_arrays(arr1: list, arr2: list):
    and_arr = []
    for item in arr1:
        for i in range(len(arr2)):
            if item == arr2[i]:
                and_arr.append(item)
                arr2 = arr2[:i] + arr2[(i+1):]
                break
    return and_arr


register_model(ControlModel.model_name, lambda: ControlModel())
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _make_groups(indexes: list, groups: list) -> list:
    for group in groups:
        founded = True
        for item in group:
            if item not in indexes:
                founded = False
                break
        if founded:
            for item in group:
                indexes.remove(item)
            indexes.append(group)
    return indexes


def _compute_sim(active_fragments: list, test_fragments: list) -> list:
    summary = 0
    for item in test_fragments:
        if item in active_fragments:
            summary += 1
    sim = summary / (len(active_fragments) + len(test_fragments) - summary)
    return sim


register_model(CutoffActiveGroupModel.model_name, lambda: CutoffActiveGroupModel())


Esempio n. 15
0
                        sum -= inactive_parameter
                    counter += 1
                    if counter == name_num[molecule_num]["fragments"]:
                        score = {
                            "name": name_num[molecule_num]["molecule"],
                            "score": sum / counter
                        }
                        counter = 0
                        sum = 0
                        molecule_num += 1
                        if first_write:
                            first_write = False
                        else:
                            streamo.write("\n")
                        json.dump(score, streamo)


def _read_molecules(input_file: str):
    name_num = []
    with open(input_file, "r") as stream:
        for line in stream:
            molecule = json.loads(line)
            name = molecule["name"]
            num_fragments = len(molecule["fragments"])
            mol_frag = {"molecule": name, "fragments": num_fragments}
            name_num.append(mol_frag)
    return name_num


register_model(DescriptorsModel.model_name, lambda: DescriptorsModel())
Esempio n. 16
0
                else:
                    output_stream.write("\n")
                json.dump(score, output_stream)


def _extract_names(input_file: str) -> list:
    names = []
    with open(input_file, "r", encoding="utf-8") as stream:
        for new_line in stream:
            line = json.loads(new_line)
            names.append(line["name"])
    return names


def _extract_descriptors(input_file: str) -> list:
    descriptors = []
    with open(input_file, "r", encoding="utf-8") as stream:
        line = stream.readline()
        line_parts = line.split(",")
        if line_parts[1] == "index":
            print("Wrong input, expected molecules not fragments")
            exit(1)
        for line in stream:
            line_parts = line.split(",")
            descriptors.append(list(map(float, line_parts[1:])))
    return descriptors


register_model(DecisionTreeClassifier.model_name,
               lambda: DecisionTreeClassifier())
            model_configuration["configuration"]["active_parameter"])
        inactive_parameter = int(
            model_configuration["configuration"]["inactive_parameter"])
        with open(output_file, "w") as streamo:
            first = True
            with open(fragments_file, "r") as stream:
                for line in stream:
                    molecule = json.loads(line)
                    name = molecule["name"]
                    suma = 0
                    for fragment in molecule["fragments"]:
                        founded = False
                        for index in model_configuration["data"]:
                            if int(index) == int(fragment["index"]):
                                founded = True
                                break
                        if founded:
                            suma += active_parameter
                        else:
                            suma -= inactive_parameter
                    sim = suma / len(molecule["fragments"])
                    score = {"name": name, "score": sim}
                    if first:
                        first = False
                    else:
                        streamo.write("\n")
                    json.dump(score, streamo)


register_model(PositiveIndexModel.model_name, lambda: PositiveIndexModel())
            molecule = Chem.MolFromSmiles(molecule_smiles)
            fcfp_fingerprint = AllChem.GetMorganFingerprint(molecule,
                                                            radius,
                                                            useFeatures=True)
            active_molecules_fcfp.append(fcfp_fingerprint)

        first_line = True
        with open(output_file, "w", encoding="utf-8") as output_stream:
            with open(fragments_file, "r", encoding="utf-8") as input_stream:
                for new_line in input_stream:
                    line = json.loads(new_line)
                    test_molecule_input = line["smiles"]
                    test_molecule_smiles = test_molecule_input.strip("\"")
                    test_molecule = Chem.MolFromSmiles(test_molecule_smiles)
                    test_mol_fingerprint = AllChem.GetMorganFingerprint(
                        test_molecule, radius, useFeatures=True)
                    max_sim = max([
                        DataStructs.TanimotoSimilarity(test_mol_fingerprint,
                                                       fingerprint)
                        for fingerprint in active_molecules_fcfp
                    ])
                    score = {"name": line["name"], "score": max_sim}
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


register_model(RdkitFcfpModel.model_name, lambda: RdkitFcfpModel())
                    sim = summary / (len(model_configuration["data"]["active"]) +
                                     len(test_active_indexes) - summary)
                    score = {
                        "name": line["name"],
                        "score": sim
                    }
                    if first_line:
                        first_line = False
                    else:
                        output_stream.write("\n")
                    json.dump(score, output_stream)


def _make_groups(indexes: list, groups: list) -> list:
    for group in groups:
        founded = True
        for item in group:
            if item not in indexes:
                founded = False
                break
        if founded:
            for item in group:
                indexes.remove(item)
            indexes.append(group)
    return indexes


register_model(EcfpGroupModel.model_name, lambda: EcfpGroupModel())