Ejemplo n.º 1
0
def extract_named_descriptors_of_seq(sequence):
    '''
    Returns a map ("descriptor" -> value) of descriptors when given a sequence of aminoacids (string)
    :param sequence:
    :return:
    '''
    #mol = Chem.MolFromSequence(str(sequence))
    res = {}
    sequence = str(sequence)
    if len(sequence) > 3:
        #print(Autocorrelation.CalculateGearyAutoTotal(sequence))
        res = PseudoAAC.GetPseudoAAC(sequence,
                                     lamda=3,
                                     weight=0.05,
                                     AAP=[
                                         _Hydrophobicity, _hydrophilicity,
                                         _residuemass, _pK1, _pK2, _pI
                                     ])
        res.update(AAComposition_extra_PS.CalculateAAComposition(sequence))
        res.update(CTD.CalculateCTD(sequence))
        mol = Chem.MolFromSequence(str(sequence))
        #res = geary.GetGearyAuto(mol)
        res.update(kappa.GetKappa(mol))
        res.update(charge.GetCharge(mol))
        #res.update(moran.GetMoranAuto(mol))
        res.update(moreaubroto.GetMoreauBrotoAuto(mol))
        res.update(molproperty.GetMolecularProperty(mol))
        #res.update(moe.GetMOE(mol))
        res.update(basak.Getbasak(mol))

        #print(res)
        #input()
    return res
Ejemplo n.º 2
0
    def main_process(self, str_pep_file, str_des_out):

        self.str_pep_file = str_pep_file
        self.str_des_out = str_des_out

        my_pep, list_class_label = Str_DS_class().read_pep_file(
            self.str_pep_file)
        Str_DS_class().structure_gen(my_pep)

        sValues = []

        sdf_names = glob.glob(os.path.join("./strs", '*.sdf'))
        n = len(sdf_names)
        #print sdf_names
        print "Descriptors being calculated !"

        bar2 = Bar('Processing SDF file', fill='>', max=n)

        for sdn in my_pep:
            mols = Chem.SDMolSupplier(os.path.join("./strs", sdn + ".sdf"))
            #mols = Chem.SDMolSupplier(sdn)
            for mol in mols:
                sValue = Str_DS_class().new_Des_gene(mol)
                sValues.append(sValue)
                bar2.next()
        sDF1 = pd.DataFrame(sValues)
        #sDF1.to_csv(self.str_des_out, index = False)
        bar2.finish()

        print "Descripor calculation has been finished "

        return sDF1, list_class_label
Ejemplo n.º 3
0
 def smiles_converter(self, smi, L):
     res = []
     try:
         mol = Chem.MolFromSmiles(smi)
         res.append(constitution.GetConstitutional(mol))
         res.append(connectivity.GetConnectivity(mol))
         res.append(fingerprint.CalculateMACCSFingerprint(mol)[1])
         try:
             res.append(kappa.GetKappa(mol))
         except:
             pass
         try:
             drug.ReadMolFromSmile(smi)
             res.append(drug.GetMOE())
         except:
             pass
         try:
             res.append(charge.GetCharge(mol))
         except:
             pass
     except:
         pass
     super_dict = {}  # uses set to avoid duplicates
     if len(res) >= 1:
         for d in res:
             for k, v in d.items():  # use d.iteritems() in python 2
                 super_dict[k] = v
     super_dict["smiles"] = smi.strip()
     L.append(super_dict)
Ejemplo n.º 4
0
def get_f1_f2(row):
    try:
        protein_sequence = dpi.GetProteinSequenceFromID(row['target_id'])
        dpi.ReadProteinSequence(protein_sequence)
        aa_composition = dpi.GetAAComp()  #COMPOSICAO AMINOACIOS
        molecule = Chem.MolFromSmiles(row['smiles'])
        kappa_descriptors = kappa.GetKappa(molecule)

        if (row.name % 500 == 0
            ):  # para facilitar o processo, a leitura e feita aos poucos
            partial = pd.DataFrame(
                result_list_F1
            )  # os smiles e target_id das colunas que dao erros que sao guardados num ficheiro
            partial.to_csv(export_path + "export_partial_f1.csv")
            partial = pd.DataFrame(result_list_F2)
            partial.to_csv(export_path + "export_partial_f2.csv")
            partial = pd.DataFrame(target_id_errors)
            partial.to_csv(export_path + "errors.csv")

        result_list_F1.append(
            dpi.GetDPIFeature1(kappa_descriptors, aa_composition))
        result_list_F2.append(
            dpi.GetDPIFeature2(kappa_descriptors, aa_composition))
    except:
        dic = {'smiles': row['smiles'], 'target_id': row['target_id']}
        target_id_errors.append(dic)

    print(row.name)