def main(): #programm, mis kysib valgu fasta faili ja annab selle kohta parameetrid fasta = input() sequence = read_fasta(fasta) print(sequence) analysed_seq = ProteinAnalysis(str(sequence)) print("\n","Molekulaarmass:",analysed_seq.molecular_weight()) print("\n","Aminohapete arv:",analysed_seq.count_amino_acids()) print("\n","Isoelektriline punkt:",analysed_seq.isoelectric_point()) text_file = open("Valgu_parameetrid.txt", "w") text_file.write(str(analysed_seq.molecular_weight())) text_file.write("\n") text_file.write(str(analysed_seq.count_amino_acids())) text_file.write("\n") text_file.write(str(analysed_seq.isoelectric_point())) text_file.close()
def protParam(seq): params = ProteinAnalysis(seq) mw = params.molecular_weight() c_aa = params.count_amino_acids() p_aa = params.get_amino_acids_percent() gravy = params.gravy() aromaticity = params.aromaticity() isoelectric_point = params.isoelectric_point() ext_coeff = sum([c_aa["W"]*5690,c_aa["Y"]*1280,c_aa["C"]*120]) mgml = ext_coeff * (1./mw) print("Amino acid count") pprint.pprint(c_aa) print("Amino acid percent") pprint.pprint(p_aa) print("Molecular weight") print("%f Da"%mw) print("Gravy") print(gravy) print("Isoelectric point") print(isoelectric_point) print("Aromaticity") print(aromaticity) print("Extinction coefficient: %d M-1cm-1 (Assuming reduced)"%ext_coeff) print("")
def get_protein_analysis(aa): protein_analysis = ProteinAnalysis(aa) analyze = [protein_analysis.molecular_weight(), protein_analysis.aromaticity(), protein_analysis.instability_index(), protein_analysis.isoelectric_point(), protein_analysis.gravy()] + list( protein_analysis.secondary_structure_fraction()) return analyze
def draw_sequence(sequence, mode = 'simple', alphabet = None): if mode == 'protparams': returndiv = DIV() from Bio.SeqUtils.ProtParam import ProteinAnalysis seq_div=DIV(_style='font-family:monospace',_class='raw-sequence') spacer=len(str(len(sequence)))+1 for i,pos in enumerate(sequence): if i==0: seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' '))) if i%10==0 and i!=0: seq_div.append(' ') if i%60==0 and i!=0: seq_div.append(XML((str(i)).ljust(spacer).replace(' ',' '))) seq_div.append(BR()) seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' '))) seq_div.append(SPAN(pos,_class='seq-position',_title = i+1)) returndiv.append(seq_div) returndiv.append(H3('Protein Parameters')) params_table = TABLE(_style= "width:200px;") protpar=ProteinAnalysis(sequence) params_table.append(TR(SPAN('Length:',_class = 'line-header'), '%i aa'%len(sequence))) try: params_table.append(TR(SPAN('MW:',_class = 'line-header'), '%i KDa'%round(protpar.molecular_weight()/1000,0))) except KeyError: pass try: params_table.append(TR(SPAN('pI:',_class = 'line-header'), '%1.2f'%protpar.isoelectric_point())) except KeyError: pass returndiv.append(params_table) return returndiv if mode == 'simple': seq_div=DIV(_style='font-family:monospace',_class='raw-sequence') spacer=len(str(len(sequence)))+1 for i,pos in enumerate(sequence): if i==0: seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' '))) if i%10==0 and i!=0: seq_div.append(' ') if i%60==0 and i!=0: seq_div.append(XML((str(i)).ljust(spacer).replace(' ',' '))) seq_div.append(BR()) seq_div.append(XML((str(i+1)+' ').rjust(spacer).replace(' ',' '))) seq_div.append(SPAN(pos,_class='seq-position', _title = i+1)) return seq_div
def properties(toxin_faa,antitoxin_faa,out): # Build a dictionary of {locus:[{properties:values},{properties:values}]} from collections import defaultdict loci = defaultdict(list) from Bio import SeqIO for f in [toxin_faa,antitoxin_faa]: # Parse FASTA files with open(f,'rU') as handle: for record in SeqIO.parse(handle,'fasta'): locus,start = getNameAndPosition(record) if not start: continue aaseq = str(record.seq).strip("*") # Omit sequences with missing positions or premature stops # give them 0 as flag for missing data instead if "*" not in aaseq and "X" not in aaseq: data = ProteinAnalysis(aaseq) loci[locus].append({ 'start': start, 'pI': data.isoelectric_point(), 'weight': data.molecular_weight(), 'instability': data.instability_index() }) else: loci[locus].append({ 'start': start, 'pI': 0, 'weight':0 , 'instability': 0 }) # Order genes in a locus positionally loci = orderPairs(loci) # Write to output fil outfile = ".".join([out,"properties","txt"]) with open(outfile,'w') as o: header = "\t".join(["locus", "gene1_pI","gene2_pI", "gene1_weight","gene2_weight", "gene1_instability","gene2_instability" ]) o.write("#"+ header.upper() + "\n") for locus, gene in loci.iteritems(): if len(gene) != 2: continue line = map(str, [ locus,gene[0]['pI'],gene[1]['pI'], gene[0]['weight'],gene[1]['weight'], gene[0]['instability'],gene[1]['instability'] ]) o.write("\t".join(line)+"\n") return outfile
def main(): ieps = [] seqid = [] inputfile = "/isi/olga/xin/Halophile_project/output/20160421/SS37_aa.faa" outputfile = "/isi/olga/xin/Halophile_project/output/20160421/SS37_reads_isp.txt" f = open(inputfile, 'rU') sequences = SeqIO.parse(f, "fasta") for record in sequences: seqid.append(record.id) seq = str(record.seq) seq_pa = ProteinAnalysis(seq) ie = seq_pa.isoelectric_point() ieps.append(ie) read_ieps = np.column_stack((seqid, ieps)) df = pd.DataFrame(read_ieps) df.to_csv(outputfile, sep = '\t', header = False)
def __init__(self, sequence): self.sequence = sequence self.sequence_length = len(sequence) analysis = ProteinAnalysis(sequence) self.amino_acid_percents = analysis.get_amino_acids_percent() self.amino_acids_composition = calculate_amino_acids_composition(sequence) self.aromaticity = analysis.aromaticity() self.instability = analysis.instability_index() self.flexibility = calculate_flexibility(sequence) protein_scale_parameters = [{'name': 'Hydrophilicity', 'dictionary': hw}, {'name': 'Surface accessibility', 'dictionary': em}, {'name': 'Janin Interior to surface transfer energy scale', 'dictionary': ja}, {'name': 'Bulkiness', 'dictionary': bulkiness}, {'name': 'Polarity', 'dictionary': polarity}, {'name': 'Buried residues', 'dictionary': buried_residues}, {'name': 'Average area buried', 'dictionary': average_area_buried}, {'name': 'Retention time', 'dictionary': retention_time}] self.protein_scales = calculate_protein_scales(analysis, protein_scale_parameters) self.isoelectric_point = analysis.isoelectric_point() self.secondary_structure_fraction = calculate_secondary_structure_fraction(analysis) self.molecular_weight = analysis.molecular_weight() self.kyte_plot = analysis.gravy() self.pefing = calculate_pefing(sequence) # next parameters are calculated using R.Peptides r('require(Peptides)') r('sequence = "{0}"'.format(sequence)) self.aliphatic_index = r('aindex(sequence)')[0] self.boman_index = r('boman(sequence)')[0] self.charges = calculate_charges(sequence, 1.0, 14.0, 0.5, 'Lehninger') self.hydrophobicity = r('seq(sequence)')[0] angles = [{'name': 'Alpha-helix', 'angle': -47}, {'name': '3-10-helix', 'angle': -26}, {'name': 'Pi-helix', 'angle': -80}, {'name': 'Omega', 'angle': 180}, {'name': 'Antiparallel beta-sheet', 'angle': 135}, {'name': 'Parallel beta-sheet', 'angle': 113}] if self.amino_acid_percents['P'] + self.amino_acid_percents['G'] > 0.3: angles.append({'name': 'Polygly-polypro helix', 'angle': 153}) self.hydrophobic_moments = calculate_hydrophobic_moments(sequence, angles) self.kidera_factors = calculate_kidera_factors(sequence) self.peptide_types = calculate_peptide_types(sequence, angles)
def protein_analysis(): if session.username == None: redirect(URL(r=request,f='../account/log_in')) from Bio.SeqUtils.ProtParam import ProteinAnalysis form = FORM(TABLE( TR("Amino acid sequence: ", TEXTAREA(_type="text", _name="sequence", requires=IS_NOT_EMPTY())), INPUT(_type="submit", _value="SUBMIT"))) if form.accepts(request.vars,session): session['sequence'] = seqClean(form.vars.sequence.upper()) X = ProteinAnalysis(session['sequence']) session['aa_count'] = X.count_amino_acids() session['percent_aa'] = X.get_amino_acids_percent() session['mw'] = X.molecular_weight() session['aromaticity'] = X.aromaticity() session['instability'] = X.instability_index() session['flexibility'] = X.flexibility() session['pI'] = X.isoelectric_point() session['sec_struct'] = X.secondary_structure_fraction() redirect(URL(r=request, f='protein_analysis_output')) return dict(form=form)
class Peptide(PolyIon): """Peptide represents single protein chains in solution. Peptides properties are based entirely on analysis of the sequence of the peptide. """ _state = {'name': 'Name of the peptide.', 'sequence': 'Amino acid sequence of the peptide.' } _sequence = None _analysis = None # TODO: move h to function or constants. Unify with pitts? _h_max = 1 _h_min = 2./3. _h = 5./6. def __init__(self, name=None, sequence=None): self._name = name self._sequence = sequence self._analysis = ProteinAnalysis(str(self.sequence)) @property def molecular_weight(self): return SeqUtils.molecular_weight(self.sequence, 'protein') def charge(self, pH=None, ionic_strength=None, temperature=None, moment=1): """Return the time-averaged charge of the peptide. :param pH :param ionic_strength :param temperature """ pH, ionic_strength, temperature = \ self._resolve_context(pH, ionic_strength, temperature) amino_acid_count = self._analysis.count_amino_acids() pos_pKs = dict(positive_pKs) neg_pKs = dict(negative_pKs) nterm = self.sequence[0] cterm = self.sequence[-1] if nterm in pKnterminal: pos_pKs['Nterm'] = pKnterminal[nterm] if cterm in pKcterminal: neg_pKs['Cterm'] = pKcterminal[cterm] charge = IsoelectricPoint(self.sequence, amino_acid_count)._chargeR(pH, pos_pKs, neg_pKs) return charge**moment def isoelectric_point(self, ionic_strength=None, temperature=None): """Return the isoelectric point of the peptide.""" # _, ionic_strength, temperature = \ # self._resolve_context(None, ionic_strength, temperature) return self._analysis.isoelectric_point() def volume(self): """Return the approximate volume of the folded peptide in m^3.""" v = self.molecular_weight / avogadro / self.density() / lpm3 / gpkg return v def radius(self): """Return the approximate radius of the folded peptide in m.""" return (self.volume() * 3. / 4. / pi) ** (1. / 3.) def density(self): """Return the approximate density of the folded peptide in kg/L.""" return 1.410 + 0.145 * exp(-self.molecular_weight / 13.) def mobility(self, pH=None, ionic_strength=None, temperature=None): """Return the effective mobility of the ion in m^2/V/s. If a context solution is available, mobility uses the full Onsager-Fuoss correction to mobility. Otherwise, the Robinson-Stokes model is used. :param pH :param ionic_strength :param temperature """ pH, ionic_strength, temperature = \ self._resolve_context(pH, ionic_strength, temperature) mobility = self.charge(pH) * elementary_charge /\ (6 * pi * self._solvent.viscosity(temperature) * self.radius() * (1 + self.radius() / self._solvent.debye(ionic_strength, temperature) ) ) * self._h return mobility
from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio.SeqUtils import ProtParamData import sys import json inp = json.loads(sys.argv[1]) seq = inp["Sequence"] X = ProteinAnalysis(seq) data = dict() if "MW" in inp["Options"]: data["MW"] = X.molecular_weight() if "EC280" in inp["Options"]: aa_count = X.count_amino_acids() if "hasDisulfide" in inp["Options"]: data["EC280"] = 1490 * aa_count["Y"] + 5500 * aa_count["W"] + 62.5 * aa_count["C"] else: data["EC280"] = 1490 * aa_count["Y"] + 5500 * aa_count["W"] if "PI" in inp["Options"]: data["PI"] = X.isoelectric_point() if "AACont" in inp["Options"]: ratios = X.get_amino_acids_percent() data["AACont"] = {aa: ratios[aa] * 100. for aa in ratios} print json.dumps(data)
#!/usr/bin/env python import sys from Bio import SeqIO from Bio.SeqUtils.ProtParam import ProteinAnalysis sys.stdout.write( "ID\tMW\tIP\tgravy\tlength\tinstability\tmonoisotpoic\tSequence\n") for record in SeqIO.parse(sys.stdin, "fasta"): a = ProteinAnalysis(str(record.seq)) properties = list() properties.append(record.id) properties.append(a.molecular_weight()) properties.append(a.isoelectric_point()) properties.append(a.gravy()) properties.append(a.length) properties.append(a.instability_index()) properties.append(a.aromaticity()) # always last column to make the output more readable properties.append(a.sequence) sys.stdout.write('\t'.join(map(str, properties)) + "\n")
from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio.SeqUtils import ProtParamData from Bio import SeqIO with open('../../samples/pdbaa') as fh: for rec in SeqIO.parse(fh,'fasta'): myprot = ProteinAnalysis(str(rec.seq)) print(myprot.count_amino_acids()) print(myprot.get_amino_acids_percent()) print(myprot.molecular_weight()) print(myprot.aromaticity()) print(myprot.instability_index()) print(myprot.flexibility()) print(myprot.isoelectric_point()) print(myprot.secondary_structure_fraction()) print(myprot.protein_scale(ProtParamData.kd, 9, .4))
def feature_extractor(result, sequence, simplified_sequence, suffix, bipeptide=False, secondary_struct=True, side_charge=True, peptide=True, aromaticity=True, instability=True, average_h=True, side_charge_ave=True, gravy=False): if (peptide): for acid in AMINO_ACIDS: result["{}_composition_{}".format(acid, suffix)] = aminoacid_composition( sequence, acid) if (bipeptide): for acid1 in AMINO_ACIDS: for acid2 in AMINO_ACIDS: result["{}{}_composition_{}".format( acid1, acid2, suffix)] = aminoacid_composition(sequence, acid1 + acid2) if (average_h): result["hydrophobicity_{}".format(suffix)] = average_hydrophobicity( sequence) result["total_positive_hydrophobicity_{}".format( suffix)] = total_hydrophobicity(sequence, "positive") result["total_negative_hydrophobicity_{}".format( suffix)] = total_hydrophobicity(sequence, "negative") if (side_charge): if (side_charge_ave): result["side_chain_charge_{}".format( suffix)] = average_side_chain_charge(sequence) result["total_positive_charge_{}".format(suffix)] = total_charge( sequence, "positive") result["total_negative_charge_{}".format(suffix)] = total_charge( sequence, "negative") tools = ProteinAnalysis(sequence) tools_simplified_sequence = ProteinAnalysis(simplified_sequence) if (secondary_struct): helix, turn, sheet = tools.secondary_structure_fraction() result["helix_{}".format(suffix)] = helix result["turn_{}".format(suffix)] = turn result["sheet_{}".format(suffix)] = sheet if (aromaticity): result["aromaticity_{}".format(suffix)] = tools.aromaticity() result["isoelectric_point_{}".format(suffix)] = tools.isoelectric_point() result["molecular_weight_{}".format( suffix)] = tools_simplified_sequence.molecular_weight() if (instability): result["instability_index_{}".format( suffix)] = tools_simplified_sequence.instability_index() if (gravy): result["gravy_{}".format(suffix)] = tools_simplified_sequence.gravy() return result
from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio import SeqIO import sys handle = open(sys.argv[1], 'rU') records = list(SeqIO.parse(handle, "fasta")) for record in records: prot = ProteinAnalysis(str(record.seq)) print prot.isoelectric_point()
def feature_extraction(outdir=os.getcwd()): path_ = outdir _ = (os.path.join(path_, "data/data.txt")) # path to input data (sequence) folder print('Reading data...') if os.path.exists(_): data = pd.read_table(_) #read the file as Pandas DataFrame print('Clearing existing files...') shutil.rmtree(path_ + "/data") seq_list, cls_list = data['sequence'].tolist(), data['class'].tolist( ) # get the sequence and class to lists pth = path_ + '/output/' if not os.path.exists(pth): os.makedirs(pth) #try:[os.remove(filenames[0]+x) for filenames in os.walk(pth) for x in (filenames[2])] # remove the file if already exist #except Exception:pass p = '/config/attrib' filepath = pkg_resources.resource_filename(__name__, p) attr = open(filepath, "rb") attr = pickle.load( attr) # load the pickle file with attribue names (for weka) with open(pth + "/weka_output.arff", "a+") as wk: wk.write("".join('{}\n'.format(x) for x in attr)) def format_output( aa_count, cnt ): # write the extracted feature values to arff (weka), txt(svm) and csv file a = (dict(zip(it.count(), list(aa_count.values())))) if cnt == 1: with open(pth + "svm_out.txt", "a+") as s: s.write("+1 " + ' '.join("{}:{}".format(k, v) for k, v in a.items()) + "\n") with open(pth + "weka_output.arff", "a+") as w: w.write(' '.join("{},".format(x) for x in list(aa_count.values())) + " serk\n") with open(pth + "tain_DL.csv", "a+") as DPL: DPL.write(''.join("{},".format(x) for x in list(aa_count.values())) + str(round(aromat, 3)) + "," + str(round(fraction[0], 3)) + "," + str(round(fraction[1], 3)) + "," + str(round(fraction[2], 3)) + "," + str(round(iso, 3)) + "," + str(mol_w) + "," + str(ins) + "," + str(cnt) + "\n") else: with open(path_ + "svm_out.txt", "a+") as s: s.write("-1 " + ' '.join("{}:{}".format(k, v) for k, v in a.items()) + "\n") with open(pth + "weka_output.arff", "a+") as w: w.write(' '.join("{},".format(x) for x in list(aa_count.values())) + " loc\n") with open(pth + "tain_DL.csv", "a+") as DPL: DPL.write(''.join("{},".format(x) for x in list(aa_count.values())) + str(round(aromat, 3)) + "," + str(round(fraction[0], 3)) + "," + str(round(fraction[1], 3)) + "," + str(round(fraction[2], 3)) + "," + str(round(iso, 3)) + "," + str(mol_w) + "," + str(ins) + "," + "0" + "\n") for seq, cl in zip(seq_list, cls_list): # main loop to extract the features _ = ProteinAnalysis(seq) # Biopython protein analysis package aa_count = (_.count_amino_acids()) # amino acid count aromat, fraction, iso = _.aromaticity( ), _.secondary_structure_fraction(), _.isoelectric_point() try: mol_w, ins = ("%0.2f" % _.molecular_weight()), ("%0.2f" % _.instability_index()) except Exception: mol_w, ins = mol_w, ins # aromaticity, sec_strucure_fraction, iso_electric point , molecular weight, instability index format_output(aa_count, cl) print("Feature extraction complete...") print("Extracted features are saved in" + outdir + "/ directory in .txt, .arff and .csv formats")
def get_isoelectric_point(self): from Bio.SeqUtils.ProtParam import ProteinAnalysis analysis = ProteinAnalysis(self.seq) return analysis.isoelectric_point()
if os.path.isfile(os.path.join(path, entry)): if entry.endswith('.pdb'): areas = get_area_classes(entry) polar_area.append(areas[0]) apolar_area.append(areas[1]) total_area.append(areas[2]) for entry in os.listdir(path): if os.path.isfile(os.path.join(path, entry)): if entry.endswith('.pdb'): for record in SeqIO.parse(entry, "pdb-atom"): sequence = str(record.seq).replace('X', 'G') protein = ProteinAnalysis(sequence) p_len.append(len(sequence)) mol_w.append(protein.molecular_weight()) iso_p.append(protein.isoelectric_point()) smell.append(protein.aromaticity()) taste_factor.append(protein.gravy()) insta_ind.append(protein.instability_index()) helter_skeler.append(protein.secondary_structure_fraction()[0]) turnip.append(protein.secondary_structure_fraction()[1]) garfield.append(protein.secondary_structure_fraction()[2]) for x in amino_acids: n = protein.count_amino_acids()[x] for y in d_count.keys(): if y[-1] == x: d_count[y].append(n) for a in amino_acids: m = protein.get_amino_acids_percent()[a] for b in d_perc.keys(): if b[-1] == a:
#print() #print("Protein Analysis Started") # open csv file and read in data row by rows # process each row for pI, MW, etc with open('xtal_2.csv') as csvfile: readCSV = csv.reader(csvfile, delimiter=',') for row in readCSV: pid = row[0] pseq = row[1].upper() pseq = pseq.replace('X', 'G') xtal = pcrystal(pid, pseq) xtal.label = int(row[2]) analysed_seq = ProteinAnalysis(pseq) xtal.pI = analysed_seq.isoelectric_point() xtal.MW = analysed_seq.molecular_weight() #print() #print(pseq) #print(xtal.id) #print(xtal.seq.upper()) #print(xtal.label) #print ('protein isoelectric point is %.2f' % xtal.pI) #print ('protein molecular weight is %.2f' % xtal.MW) #print ('protein length is ', xtal.size) #print ('percent hydrophobic is %.2f' % xtal.phobic) #print ('percent hydrophillic is %.2f' % xtal.phillic) #if (xtal.label == 0): #print("Crystallization Failed!") #else: #print("Protein Crystallized!")
#!/usr/bin/env python # Calculating the molecular weight and isoelectric point for a FASTA file containing numerous sequences #to keep the program for general use, argparse and sys used to allow the input pile to be user defined through the command line import argparse import sys parser = argparse.ArgumentParser( description='Calculate mw and pi for protein sequences.') parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) args = parser.parse_args() #to read from a FASTA file with a loop over entries using SeqIO define the FASTA sequences and analyse them by ProteinAnalysis #display the sequence names, molecular weight and isoelectric point from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio import SeqIO for record in SeqIO.parse(args.infile, "fasta"): seq = str(record.seq) my_prot = ProteinAnalysis(seq) print '{}\t {}\t {}'.format(record.id, my_prot.molecular_weight(), my_prot.isoelectric_point())
def get_biopython_features(X): res = np.zeros((X.shape[0], 6)) for i,seq in enumerate(X): analysed_seq = ProteinAnalysis(seq) res[i] = np.array([analysed_seq.molecular_weight()]+[analysed_seq.instability_index()] + [analysed_seq.isoelectric_point()] + list(analysed_seq.secondary_structure_fraction())) return res
handle, "fasta", alphabet=IUPAC.protein ): start_time = time.time() record_list = record.description.split("|") # get meta data acc_code = record_list[0] organism = record_list[1] EC_code = record_list[2].replace("__", " ") species = record_list[3].replace("__", " ") note = record_list[4] # get unmodified pI seq = record.seq seq_obj = ProteinAnalysis(''.join(seq)) pi = seq_obj.isoelectric_point() count_sequences_done += 1 modifier = '0' if pi < param_dict['cutoff_pi']: category = '0' else: category = '1' # output to CSV with open(param_dict['out_CSV_pi'], 'a') as f: string = file+',' string += acc_code+',' string += organism+',' string += EC_code+',' string += species+',' string += note+',' string += '{0:.2f}'.format(pi)+','
def iso_e(protS): """return the isoelectric point of protS string protein sequence""" from Bio.SeqUtils.ProtParam import ProteinAnalysis protA = ProteinAnalysis(protS) return protA.isoelectric_point()
y_axis = [] x_axis = data_mwt for record in SeqIO.parse(seq_file, "fasta"): #for record in SeqIO.parse(seq_file, "fasta"): temp_seq=str(record.seq) analysis_seq=ProteinAnalysis(temp_seq) if ("ribosomal protein" in record.description or "ribosomal subunit" in record.description): #if ("ribosomal protein" in record.description or "ribosomal subunit" in record.description or "Ribosomal" in record.description): if (analysis_seq.molecular_weight() < 20000): data_mwt.append('%.2f'%(analysis_seq.molecular_weight())) y_axis.append(1) text_out.setTextColor(QColor('blue')) text_out.append(str(len(data_mwt)) + "," + record.description + "," + '%.2f'%(analysis_seq.molecular_weight()) + "," + '%.2f'%(analysis_seq.isoelectric_point())) #new=sorted(data_mwt) #data_mwt.append(list(zip(['%.2f'%(analysis_seq.molecular_weight())]))) #print(record.description + " = " + '%.2f'%(analysis_seq.molecular_weight())) csv_write = csv.writer(output) #row_wise = zip([record.description],['%.2f'%(analysis_seq.molecular_weight())],['%.2f'%(analysis_seq.isoelectric_point())]) #data_mwt.append(analysis_seq.molecular_weight()) row_wise = zip(['%.2f'%(analysis_seq.molecular_weight())],['%.2f'%(analysis_seq.isoelectric_point())]) for row in row_wise: csv_write.writerow(row) #csv_write.writerow([record.description + '%.2f'%(analysis_seq.molecular_weight())])
#!/usr/bin/env python import sys from Bio import SeqIO from Bio.SeqUtils.ProtParam import ProteinAnalysis sys.stdout.write("ID\tMW\tIP\tgravy\tlength\tinstability\tmonoisotpoic\tSequence\n") for record in SeqIO.parse(sys.stdin, "fasta"): a = ProteinAnalysis(str(record.seq)) properties = list() properties.append(record.id) properties.append(a.molecular_weight()) properties.append(a.isoelectric_point()) properties.append(a.gravy()) properties.append(a.length) properties.append(a.instability_index()) properties.append(a.aromaticity()) # always last column to make the output more readable properties.append(a.sequence) sys.stdout.write( '\t'.join(map(str, properties))+"\n" )
def calc_isoelectric_point(self) -> float: """ using http://biopython.org/DIST/docs/api/Bio.SeqUtils.ProtParam-pysrc.html :return: calculates the sequence's isoelectric point """ protein_analysis = ProteinAnalysis(self.get_seq()) return protein_analysis.isoelectric_point()