Example #1
0
    def _read_header(self):
        """Priveat method called by ``__init__`` to read all file headers into the class attributes and calculate
        sequence dependant values.
        
        :return: headers in class attributes.
        """

        d = GlobalDescriptor('X')  # template

        # loop through all files in the directory
        for i, file in enumerate(self.filenames):
            with open(join(
                    self.directory,
                    file)) as f:  # read first 4 lines as header, rest as data
                head = [next(f) for _ in range(4)]
                data = [next(f) for _ in range(4, (self.wmax - self.wmin) + 5)]

            # read headers into class attributes
            name = head[0].split('\r\n')[0]
            self.names.append(name)
            sequence = head[1].split('\r\n')[0].strip()
            self.sequences.append(sequence)
            umol = float(head[2].split('\r\n')[0])
            self.conc_umol.append(umol)
            self.solvent.append(head[3].split('\r\n')[0])

            # read CD data
            wlengths = [int(line.split(',')[0])
                        for line in data]  # get rid of s***** line ends
            ellipts = [
                float(line.split(',')[1].split('\r\n')[0]) for line in data
            ]
            self.circular_dichroism.append(
                np.array(list(zip(wlengths, ellipts))))

            # calculate MW and transform concentration to mg/ml
            d.sequences = [sequence]
            d.calculate_MW(amide=self.amide)
            self.mw.append(d.descriptor[0][0])
            self.conc_mgml.append(self.mw[i] * umol / 10**6)
            self.meanres_mw.append(
                self.mw[i] / (len(sequence) -
                              1))  # mean residue molecular weight (MW / n-1)
Example #2
0
def predict():

    if request.method == 'POST':

        seq = request.form['seq']
        with open("random.fasta", "w") as fp:
            fp.write(seq)

        pepdesc = PeptideDescriptor(
            '/home/sanika/proj/random.fasta',
            'eisenberg')  # use Eisenberg consensus scale
        globdesc = GlobalDescriptor('/home/sanika/proj/random.fasta')

        # --------------- Peptide Descriptor (AA scales) Calculations ---------------
        pepdesc.calculate_global()  # calculate global Eisenberg hydrophobicity
        pepdesc.calculate_moment(
            append=True)  # calculate Eisenberg hydrophobic moment

        # load other AA scales
        pepdesc.load_scale('gravy')  # load GRAVY scale
        pepdesc.calculate_global(
            append=True)  # calculate global GRAVY hydrophobicity
        pepdesc.calculate_moment(
            append=True)  # calculate GRAVY hydrophobic moment
        pepdesc.load_scale('z3')  # load old Z scale
        pepdesc.calculate_autocorr(
            1,
            append=True)  # calculate global Z scale (=window1 autocorrelation)

        # --------------- Global Descriptor Calculations ---------------
        globdesc.length()  # sequence length
        globdesc.boman_index(append=True)  # Boman index
        globdesc.aromaticity(append=True)  # global aromaticity
        globdesc.aliphatic_index(append=True)  # aliphatic index
        globdesc.instability_index(append=True)  # instability index
        globdesc.calculate_charge(ph=7.4, amide=False,
                                  append=True)  # net charge
        globdesc.calculate_MW(amide=False, append=True)  # molecular weight

        f1 = pepdesc.descriptor
        f2 = globdesc.descriptor
        result = np.concatenate((f2, f1), axis=1)

        clf = joblib.load('ml_model.pkl')
        pred = clf.predict(result)
        proba = clf.predict_proba(result).tocoo()
        mc = pred.tocoo()
        out = mc.col
        res = []
        labels = ['antiviral', 'antibacterial', 'antifungal']
        values = proba.data
        plt.pie(values,
                labels=labels,
                autopct='%.0f%%',
                shadow=True,
                radius=0.5)
        plt.savefig('/home/sanika/proj/pie_chart.jpg')

        figfile = BytesIO()
        plt.savefig(figfile, format='png')
        figfile.seek(0)
        figdata_png = base64.b64encode(figfile.getvalue()).decode('ascii')
        plt.close()

        for i in range(len(out)):
            if out[i] == 0:
                res.append("antiviral")
            elif out[i] == 1:
                res.append("antibacterial")
            else:
                res.append("antifungal")

        return render_template('seq.html', seq=res, result=figdata_png)

    return render_template('predictor.html')
Example #3
0
def upload():

    if request.method == 'POST':
        # This will be executed on POST request.
        upfile = request.files['file']
        if upfile and allowed_file(upfile.filename):

            filename = secure_filename(upfile.filename)
            upfile.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            #return render_template('upload.html')
            #flash("File uploaded", "success")
            #with open("/home/sanika/proj/uploads/aa.fasta") as f:
            #lines = f.readlines()
            #lines = [l for l in lines if "ROW" in l]

            #with open("/home/sanika/proj/uploads/out.fasta", "w") as f1:
            #f1.writelines(lines)

            #f = open(filename)
            #prot_seq = ReadFasta(f)

            with open(filename) as fasta_file:  # Will close handle cleanly
                identifiers = []
                sequence = []
                for seq_record in SeqIO.parse(fasta_file,
                                              'fasta'):  # (generator)
                    identifiers.append(seq_record.id)
                    sequence.append(seq_record.seq)

            pepdesc = PeptideDescriptor(
                filename, 'eisenberg')  # use Eisenberg consensus scale
            globdesc = GlobalDescriptor(filename)

            # --------------- Peptide Descriptor (AA scales) Calculations ---------------
            pepdesc.calculate_global(
            )  # calculate global Eisenberg hydrophobicity
            pepdesc.calculate_moment(
                append=True)  # calculate Eisenberg hydrophobic moment

            # load other AA scales
            pepdesc.load_scale('gravy')  # load GRAVY scale
            pepdesc.calculate_global(
                append=True)  # calculate global GRAVY hydrophobicity
            pepdesc.calculate_moment(
                append=True)  # calculate GRAVY hydrophobic moment
            pepdesc.load_scale('z3')  # load old Z scale
            pepdesc.calculate_autocorr(
                1, append=True
            )  # calculate global Z scale (=window1 autocorrelation)

            # --------------- Global Descriptor Calculations ---------------
            globdesc.length()  # sequence length
            globdesc.boman_index(append=True)  # Boman index
            globdesc.aromaticity(append=True)  # global aromaticity
            globdesc.aliphatic_index(append=True)  # aliphatic index
            globdesc.instability_index(append=True)  # instability index
            globdesc.calculate_charge(ph=7.4, amide=False,
                                      append=True)  # net charge
            globdesc.calculate_MW(amide=False, append=True)  # molecular weight

            f1 = pepdesc.descriptor
            f2 = globdesc.descriptor
            result = np.concatenate((f2, f1), axis=1)
            rs = []
            for i in range(len(result)):
                prt = np.reshape(result[i], (-1, 14))
                clf = joblib.load('ml_model.pkl')
                pred = clf.predict(prt)
                out = pred.toarray()
                #print(clf.predict_proba(result))
                proba = clf.predict_proba(prt).tocoo()
                mc = pred.tocoo()
                out = mc.col
                res = []
                for i in range(len(out)):
                    if out[i] == 0:
                        res.append("antiviral")
                    elif out[i] == 1:
                        res.append("antibacterial")
                    else:
                        res.append("antifungal")
                rs.append(res)
            a = []
            for i in range(len(rs)):
                a.append('-'.join(rs[i]))

            df = pd.DataFrame(data={
                "id": identifiers,
                "sequence": sequence,
                "activity": a
            },
                              columns=['id', 'sequence', 'activity'])
            df.to_csv("result.csv", sep=',', index=False)

            os.remove(os.path.join(app.config['UPLOAD_FOLDER'], filename))

            #return render_template('seq.html', seq = rs)
            return render_template('up.html', mimetype="text/csv")

            #flash("File uploaded: Thanks!", "success")
        else:
            error = "PLEASE CHECK THE FORMAT OF FILE TO UPLOAD"
            return render_template('upload.html', error=error)

    # This will be executed on GET request.
    return render_template('predictor.html')
globdesc = GlobalDescriptor('/path/to/sequences.fasta')

# --------------- Peptide Descriptor (AA scales) Calculations ---------------
pepdesc.calculate_global()  # calculate global Eisenberg hydrophobicity
pepdesc.calculate_moment(append=True)  # calculate Eisenberg hydrophobic moment

# load other AA scales
pepdesc.load_scale('gravy')  # load GRAVY scale
pepdesc.calculate_global(append=True)  # calculate global GRAVY hydrophobicity
pepdesc.calculate_moment(append=True)  # calculate GRAVY hydrophobic moment
pepdesc.load_scale('z3')  # load old Z scale
pepdesc.calculate_autocorr(
    1, append=True)  # calculate global Z scale (=window1 autocorrelation)

# save descriptor data to .csv file
col_names1 = 'ID,Sequence,H_Eisenberg,uH_Eisenberg,H_GRAVY,uH_GRAVY,Z3_1,Z3_2,Z3_3'
pepdesc.save_descriptor('/path/to/descriptors1.csv', header=col_names1)

# --------------- Global Descriptor Calculations ---------------
globdesc.length()  # sequence length
globdesc.boman_index(append=True)  # Boman index
globdesc.aromaticity(append=True)  # global aromaticity
globdesc.aliphatic_index(append=True)  # aliphatic index
globdesc.instability_index(append=True)  # instability index
globdesc.calculate_charge(ph=7.4, amide=False, append=True)  # net charge
globdesc.calculate_MW(amide=False, append=True)  # molecular weight

# save descriptor data to .csv file
col_names2 = 'ID,Sequence,Length,BomanIndex,Aromaticity,AliphaticIndex,InstabilityIndex,Charge,MW'
globdesc.save_descriptor('/path/to/descriptors2.csv', header=col_names2)
        desc.formula(amide=True)
        for v in desc.descriptor:
            formula_array.append(v[0])
    except:
        formula_array.append('')

database['formula'] = formula_array

print("Estimate molecular_weigth")
#get MW for each sequence
molecular_weigth_array = []

for i in range(len(database)):
    try:
        desc = GlobalDescriptor([database['Sequence'][i]])
        desc.calculate_MW(amide=True)
        molecular_weigth_array.append(desc.descriptor)
    except:
        molecular_weigth_array.append('')

database['molecular_weigth'] = molecular_weigth_array

print("Estimate charge")
#calculate charge for each sequence
charge_array = []

for i in range(len(database)):
    try:
        desc = GlobalDescriptor([database['Sequence'][i]])
        desc.calculate_charge(ph=7, amide=True)
        charge_array.append(desc.descriptor[0][0])
Example #6
0
def exec(peptide, time_node):
	file = open("../src/public/jobs/service1/service1.fasta", "w") 
	file.write(peptide)
	file.close()
	fasta = SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta")
	if(any(fasta) == False): #False when `fasta` is empty
		return "error"
	cantidad = 0
	for record in SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta"):
		cantidad = cantidad+1
	if (cantidad == 1):
		properties = {}
		for record in SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta"):
			properties[str(record.id)] = {}
			#save properties

			properties[str(record.id)]["length"] = len(record.seq)

			#formula
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.formula(amide=True)
				properties[str(record.id)]["formula"] = desc.descriptor[0][0]
			except:
				properties[str(record.id)]["formula"] = "-"

			#molecular weigth
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.calculate_MW(amide=True)
				properties[str(record.id)]["molecular_weigth"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["molecular_weigth"] = "-"

			#boman_index
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.boman_index()
				properties[str(record.id)]["boman_index"] = float("%.4f" % desc.descriptor[0][0])				
			except:
				properties[str(record.id)]["boman_index"] = "-"

			#charge
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.calculate_charge(ph=7, amide=True)
				properties[str(record.id)]["charge"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["charge"] = "-"
				

			#charge density
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.charge_density(ph=7, amide=True)
				properties[str(record.id)]["charge_density"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["charge_density"] = "-"

			#estimate isoelectric point
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.isoelectric_point()
				properties[str(record.id)]["isoelectric_point"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["isoelectric_point"] = "-"

			#estimate inestability index
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.instability_index()
				properties[str(record.id)]["instability_index"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["instability_index"] = "-"

			#estimate aromaticity
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.aromaticity()
				properties[str(record.id)]["aromaticity"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["aromaticity"] = "-"

			#estimate aliphatic_index
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.aliphatic_index()
				properties[str(record.id)]["aliphatic_index"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["aliphatic_index"] = "-"

			#estimate hydrophobic_ratio
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.hydrophobic_ratio()
				properties[str(record.id)]["hydrophobic_ratio"] = float("%.4f" % desc.descriptor[0][0])	
			except:
				properties[str(record.id)]["hydrophobic_ratio"] = "-"

			#profile hydrophobicity
			try:
				desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg')
				desc.calculate_profile(prof_type='H')
				properties[str(record.id)]["hydrophobicity_profile"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["hydrophobicity_profile"] = "-"

			#profile hydrophobic
			try:
				desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg')
				desc.calculate_profile(prof_type='uH')
				properties[str(record.id)]["hydrophobic_profile"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["hydrophobic_profile"] = "-"

			#moment
			try:
				desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg')
				desc.calculate_moment()
				properties[str(record.id)]["calculate_moment"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["calculate_moment"] = "-"

			try:
				os.mkdir("../src/public/jobs/service1/"+time_node)
			except:
				print("Error")
			
			#generate plot profile
			plot_profile(str(record.seq), scalename='eisenberg', filename= "../src/public/jobs/service1/"+time_node+"/profile.png")

			#generate helical wheel
			helical_wheel(str(record.seq), colorcoding='charge', lineweights=False, filename= "../src/public/jobs/service1/"+time_node+"/helical.png")
			
			return(properties)
	
	if (cantidad > 1):
		properties = {}
		for record in SeqIO.parse("../src/public/jobs/service1/service1.fasta", "fasta"):
			properties[str(record.id)] = {}

			properties[str(record.id)]["length"] = len(record.seq)
			
			#formula
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.formula(amide=True)
				properties[str(record.id)]["formula"] = desc.descriptor[0][0]
			except:
				properties[str(record.id)]["formula"] = "-"

			#molecular weigth
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.calculate_MW(amide=True)
				properties[str(record.id)]["molecular_weigth"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["molecular_weigth"] = "-"

			#boman_index
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.boman_index()
				properties[str(record.id)]["boman_index"] = float("%.4f" % desc.descriptor[0][0])				
			except:
				properties[str(record.id)]["boman_index"] = "-"

			#charge
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.calculate_charge(ph=7, amide=True)
				properties[str(record.id)]["charge"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["charge"] = "-"
				

			#charge density
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.charge_density(ph=7, amide=True)
				properties[str(record.id)]["charge_density"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["charge_density"] = "-"

			#estimate isoelectric point
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.isoelectric_point()
				properties[str(record.id)]["isoelectric_point"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["isoelectric_point"] = "-"

			#estimate inestability index
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.instability_index()
				properties[str(record.id)]["instability_index"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["instability_index"] = "-"

			#estimate aromaticity
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.aromaticity()
				properties[str(record.id)]["aromaticity"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["aromaticity"] = "-"

			#estimate aliphatic_index
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.aliphatic_index()
				properties[str(record.id)]["aliphatic_index"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["aliphatic_index"] = "-"

			#estimate hydrophobic_ratio
			try:
				desc = GlobalDescriptor(str(record.seq))
				desc.hydrophobic_ratio()
				properties[str(record.id)]["hydrophobic_ratio"] = float("%.4f" % desc.descriptor[0][0])	
			except:
				properties[str(record.id)]["hydrophobic_ratio"] = "-"

			#profile hydrophobicity
			try:
				desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg')
				desc.calculate_profile(prof_type='H')
				properties[str(record.id)]["hydrophobicity_profile"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["hydrophobicity_profile"] = "-"

			#profile hydrophobic
			try:
				desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg')
				desc.calculate_profile(prof_type='uH')
				properties[str(record.id)]["hydrophobic_profile"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["hydrophobic_profile"] = "-"

			#moment
			try:
				desc = PeptideDescriptor(str(record.seq), scalename='Eisenberg')
				desc.calculate_moment()
				properties[str(record.id)]["calculate_moment"] = float("%.4f" % desc.descriptor[0][0])
			except:
				properties[str(record.id)]["calculate_moment"] = "-"

		return(properties)
Example #7
0
from modlamp.descriptors import GlobalDescriptor

sequences = []
MIC = []
units = []
actives = {}

# read the file with 3 columns containing MIC values
with open('Saureus.csv', 'r') as f:
    for line in f:
        sequences.append(line.split(',')[0])
        MIC.append(line.split(',')[1])
        units.append(line.split(',')[2])

D = GlobalDescriptor(sequences)
D.calculate_MW()
MW = D.descriptor.tolist()

for i, u in enumerate(units):
    if u == 'ug/ml\r\n':  # find MIC values in ug/mL
        if '+' in MIC[i]:
            mic = float(MIC[i].split('+')[0]) + float(MIC[i].split('+')[1])  # if with stdev, take upper bound
            actives[sequences[i]] = round((mic / float(MW[i][0])) * 1000., 1)  # convert ug/mL to uM
        elif '-' in MIC[i]:
            mic = float(MIC[i].split('-')[1])  # if with stdev, be conservative and take upper bound
            actives[sequences[i]] = round((mic / float(MW[i][0])) * 1000., 1)  # convert ug/mL to uM
        else:
            actives[sequences[i]] = round((float(MIC[i]) / float(MW[i][0])) * 1000., 1)  # convert ug/mL to uM

s_inactive = [s for s, v in actives.items() if v > 100.0]
s_active = [s for s, v in actives.items() if v <= 100.0]