Example #1
0
    def molecular_weight(self):
        """
        Calculates the moleular weight of DNA based on the DNA GC-content and
        length

        :return:
        """

        g = self.gc_ratio

        # DNA mass (BioPython has g.mol^-1, while we are in mmol)
        ma = molecular_weight(
            'A',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
        mt = molecular_weight(
            'T',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
        mc = molecular_weight(
            'C',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
        mg = molecular_weight(
            'G',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1

        #         [     g.mmol(bp)^-1        * mmol(bp)/mmol(dna) ] ^ -1
        return 1 / ((1 - g) * (ma + mt) + g * (mc + mg)) * self.len
Example #2
0
def define_dna_weight_constraint(model, dna, dna_ggdw, gc_content,
                                 chromosome_len):
    # DNA mass (BioPython has g.mol^-1, while we are in mmol)
    ma = molecular_weight(
        'A', seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
    mt = molecular_weight(
        'T', seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
    mc = molecular_weight(
        'C', seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
    mg = molecular_weight(
        'G', seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
    #              g.mmol(bp)^-1        * mmol(bp)/mmol(dna) * mmol(dna).gDW^-1
    tot_dna = ((1 - gc_content) * (ma + mt) + gc_content *
               (mc + mg)) * chromosome_len * dna.concentration
    # MW_avg*[DNA] = mRNA_ggdw
    # 1/scaling because the [X]s are scaled (eg mmol.ggDW^-1 -> back to mol.ggDW^1)
    mass_variable_def = tot_dna - dna_ggdw
    model.add_constraint(
        kind=InterpolationConstraint,
        hook=model,
        id_=DNA_WEIGHT_CONS_ID,
        expr=mass_variable_def,
        lb=0,
        ub=0,
    )
Example #3
0
def mw_DNA(seq=None, seq_type='DNA', double_stranded=True, **kw):
    """
    :param seq: DNA sequence; otherwise just gets one average bp mass
    :return: Molecular weight, in daltons
    """
    opt = dict(seq_type=seq_type, double_stranded=double_stranded, **kw)
    if (seq is None):
        mass = [
            molecular_weight(seq=str(s), **opt) for s in ["A", "T", "G", "C"]
        ]
        return np.mean(mass)
    # POST: some sequence to use
    return molecular_weight(seq=seq, **opt)
Example #4
0
def singanlys(filenm,lmin,lmax,ctime,warn):
    from Bio.SeqUtils import GC, molecular_weight
    singlefilenm="single_"+filenm.replace(".","")+"_"+ctime+".csv"
    outpbyseqcsv = open(singlefilenm,"w") #Creating the file to write the output
    outpbyseqcsv.write('N,ID,Type,Length,%GC,Mol Weight\n') #Headers line.
    idsnocom=[] #To store the ID of each seq without commas.
    molwght=[] #To store the molecular weight of each sequence.
    multifs = open(filenm,"rU")
    for indsq in SeqIO.parse(multifs, "fasta"):
        evalen=lenfilter(str(indsq.seq),str(indsq.id),lmin,lmax,warn)
        if evalen[0]: #Use the sequence if pass the filter.
            newid = indsq.id.replace(",", " ")
            idsnocom.append(newid)
            try: #Try calculate the molecular weight if possible, else pass.
                molwght.append(molecular_weight(str(indsq.seq),evalen[1]))
            except:
                molwght.append(0)
        else: #If does not pass the filter, ignore.
            pass
    count=0
    for i in range(len(contgslen)): #This is a external resource of the function. If you want to make the function independent from the rest of the code, provide the contgslen list as input.
        count+=1
        newlinecsv=str(count)+','+str(idsnocom[i])+','+typesofseqs[i]+','+str(contgslen[i])+','+"{0:.2f}".format(gcs[i])+','+"{0:.2f}".format(molwght[i])+'\n' #If we want to use the function independently, we need to change this line to: #newlinecsv=str(count)+','+str(idsnocom[i])+','+str(listcntgslen[i])+','+"{0:.2f}".format(listgcs[i])+','+"{0:.2f}".format(molwght[i])+'\n'
        #print newlinecsv #I left this line because maybe someone wants to see the output in the python interpreter.
        outpbyseqcsv.write(newlinecsv)
    multifs.close()
    outpbyseqcsv.close()
    print 'A .csv file has been wrote with the single sequence stats in\nyour current working directory:'
    print '('+singlefilenm+')'
    print '\nThank you!'
Example #5
0
def gfp_part_clip_mass(gfp_basicpart):
    from Bio.SeqUtils import molecular_weight

    CLIP_VOLUME = 30
    return (2.5 * molecular_weight(
        gfp_basicpart.seq, double_stranded=True, circular=True) / 1e6 *
            CLIP_VOLUME)
Example #6
0
    def molecular_weight(self):
        """ Calculate molecular weight based on stuff """

        if len(self.sequence) == 0:
            return 0

        # get the material type
        if 'DNA' in self.material: seq_type = 'DNA'
        elif 'RNA' in self.material: seq_type = 'RNA'
        elif 'protein' in self.material: seq_type = 'protein'

        # get double stranded state
        if 'ds' in self.material: double_stranded = True
        else: double_stranded = False

        # get circular state
        circular = (self.shape == 'circular')

        # find MW value
        mw = molecular_weight(self.sequence,
                              seq_type=seq_type,
                              double_stranded=double_stranded,
                              circular=circular)

        # create unit object
        self._molecular_weight = Unit('{} g/mol'.format(mw))

        # return value with unit
        return self._molecular_weight
Example #7
0
def sequence_weight(sequence):
    "Return weight in Daltons"
    ambigous_count = sequence.count('X')
    mod_seqeunce = sequence.replace('X', '')
    weight = molecular_weight(mod_seqeunce, seq_type='protein')
    weight = weight + 110 * ambigous_count  # Estimate
    return weight
Example #8
0
def DetectorRangeCoverage(arr, lower, upper):
    count = 0
    for pep in arr:
        mw = molecular_weight(pep.toString().decode("utf-8"), 'protein')
        if(mw >= lower and mw <= upper):
            count += 1
    return(100*count / len(arr))
Example #9
0
    def _calc_mw(self):
        from Bio.SeqUtils import molecular_weight

        try:
            mw = molecular_weight(
                seq=self.seq,
                seq_type=self.molecule,
                double_stranded=self.is_double_stranded,
                circular=self.is_circular,
            )
            # For some reason Biopython just assumes 5' phosphorylation, so we
            # need to correct for that here.
            if not self.is_phosphorylated_5:
                num_strands = 2 if self.is_double_stranded else 1
                num_ends = 0 if self.is_circular else num_strands
                hpo3 = 1.008 + 30.974 + 3 * 15.999
                mw -= hpo3 * num_ends

            return mw

        except QueryError:
            pass

        try:
            self._cache_stranded_molecule()
            molecule = self._molecule, self._strandedness
            return mw_from_length(self.length, molecule)

        except QueryError:
            pass

        raise QueryError(
            "need sequence or length to calculate molecular weight")
 def test_get_molecular_weight_identical(self):
     """Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
     # This test is somehow useless, since ProteinAnalysis.molecular_weight
     # is internally calling SeqUtils.molecular_weight.
     mw_1 = self.analysis.molecular_weight()
     mw_2 = molecular_weight(Seq(self.seq_text), seq_type="protein")
     self.assertAlmostEqual(mw_1, mw_2)
Example #11
0
 def molecular_weight(self):
     if not self._molecular_weight_override:
         return molecular_weight(
             self.peptide, seq_type='protein'
         ) / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
     else:
         return self._molecular_weight_override
Example #12
0
 def test_get_monoisotopic_molecular_weight_identical(self):
     """Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
     self.analysis = ProtParam.ProteinAnalysis(self.seq_text,
                                               monoisotopic=True)
     mw_1 = self.analysis.molecular_weight()
     mw_2 = molecular_weight(Seq(self.seq_text, IUPAC.protein),
                             monoisotopic=True)
     self.assertAlmostEqual(mw_1, mw_2)
Example #13
0
 def molecular_weight(self):
     if not self._molecular_weight_override:
         return molecular_weight(self.rna,
                                 seq_type='RNA') / 1000  # g.mol^-1 ->
         # kg.mol^-1 (SI) =
         # g.mmol^-1
     else:
         return self._molecular_weight_override
def calc_mass(protein_string):
    """Returns total weight of given protein string using monoisotopic mass table"""

    #Remove mass of single water molecule as considering peptides excised from middle of protein
    total_mass = molecular_weight(protein_string, "protein",
                                  monoisotopic=True) - 18.01056

    return total_mass
Example #15
0
    def calculate_mw(fname):
        target_chains = [str(chain.seq) for chain in list(SeqIO.parse(fname, "fasta"))]
        target_chains = list(set(target_chains))
        mw = 0.0
        for seq in target_chains:
            seq = seq.replace("X", "A")
            mw += round(molecular_weight(seq, "protein"), 2)

        return mw
Example #16
0
File: dna.py Project: anushchp/etfl
    def molecular_weight(self):

        g = self.gc_ratio

        # DNA mass (BioPython has g.mol^-1, while we are in mmol)
        ma = molecular_weight(
            'A',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
        mt = molecular_weight(
            'T',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
        mc = molecular_weight(
            'C',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
        mg = molecular_weight(
            'G',
            seq_type='DNA') / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1

        #         [     g.mmol(bp)^-1        * mmol(bp)/mmol(dna) ] ^ -1
        return 1 / ((1 - g) * (ma + mt) + g * (mc + mg)) * self.len
Example #17
0
def add_dummy_peptide(model, aa_ratios, dummy_gene, peptide_length):
    # Create a dummy peptide
    dummy_peptide = Peptide(id='dummy_peptide',
                            name='Dummy peptide',
                            gene_id=dummy_gene.id)
    aa_weights = [
        v * molecular_weight(k, 'protein') for k, v in aa_ratios.items()
    ]
    dummy_peptide.molecular_weight = peptide_length * sum(
        aa_weights) / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
    dummy_peptide._model = model
    model.peptides += [dummy_peptide]
    return dummy_peptide
Example #18
0
    def _extract_protein_data(object):
        try:
            from Bio.SeqUtils import molecular_weight
            from Bio.Seq import Seq

            protein_data = {
                'frame1':
                str(object.protein).upper(),
                'aa_count': {},
                'molecular_weight_f1':
                molecular_weight(object.protein.upper().replace("*", ""),
                                 seq_type="protein"),
            }

            for aa in "FLSYCWPHQRIMTNKVADEG*":
                protein_data['aa_count'][f"{aa}"] = object.protein.count(aa)

            try:
                new_sequence = Seq(object.coding_dna)
                protein_data['frame2'] = str(new_sequence[1:].translate(
                    object.translation_table))
                protein_data['molecular_weight_f2'] = molecular_weight(
                    protein_data["frame2"].replace("*", ""),
                    seq_type="protein")
                protein_data['frame3'] = str(new_sequence[2:].translate(
                    object.translation_table))
                protein_data['molecular_weight_f3'] = molecular_weight(
                    protein_data["frame3"].replace("*", ""),
                    seq_type="protein")

            except:
                pass

        except Exception as e:
            print(e)
            protein_data = None

        return protein_data
Example #19
0
def test_new_part_resuspension(gfp_orf_basicpart):
    from Bio.SeqUtils import molecular_weight

    print(f"length of basicpart: {len(gfp_orf_basicpart.seq)}")
    print(f"estimated MW: {len(gfp_orf_basicpart.seq*660)}")
    print(
        f"biopython MW: {molecular_weight(gfp_orf_basicpart.seq, double_stranded=True)}"
    )
    mass = 750
    vol = bsb.new_part_resuspension(part=gfp_orf_basicpart, mass=mass)
    print(f"Calculated volume of resuspension buffer: {vol}")
    mw = molecular_weight(gfp_orf_basicpart.seq, double_stranded=True)
    print(f"estimated concentration: {mass*1e-9/(vol*1e-6*mw)*1e9}")
    assert 75 == round(mass * 1e-9 / (vol * 1e-6 * mw) * 1e9)
Example #20
0
def add_dummy_mrna(model, dummy_gene, mrna_kdeg, mrna_length, nt_ratios):
    h2o = model.essentials['h2o']
    h = model.essentials['h']
    ppi = model.essentials['ppi']

    # Create a dummy mRNA
    dummy_mrna = mRNA(id='dummy_gene',
                      name='dummy mRNA',
                      kdeg=mrna_kdeg,
                      gene_id=dummy_gene.id)
    nt_weights = [v * molecular_weight(k, 'RNA') for k, v in nt_ratios.items()]
    dummy_mrna.molecular_weight = mrna_length * sum(
        nt_weights) / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
    model.add_mrnas([dummy_mrna], add_degradation=False)
    dummy_transcription = TranscriptionReaction(
        id=model._get_transcription_name(dummy_mrna.id),
        name='Dummy Transcription',
        gene_id=dummy_gene.id,
        enzymes=model.rnap.values(),
        scaled=True)
    model.add_reactions([dummy_transcription])
    model.transcription_reactions += [dummy_transcription]
    # Use the input ratios to make the stoichiometry
    transcription_mets = {
        model.metabolites.get_by_id(model.rna_nucleotides[k]):
        -1 * v * mrna_length
        for k, v in nt_ratios.items()
    }
    transcription_mets[ppi] = mrna_length
    dummy_transcription.add_metabolites(transcription_mets, rescale=True)
    # Add the degradation
    mrna_deg_stoich = {
        model.metabolites.get_by_id(model.rna_nucleotides_mp[k]):
        -1 * v * mrna_length
        for k, v in nt_ratios.items()
    }
    mrna_deg_stoich[h2o] = -1 * mrna_length
    mrna_deg_stoich[h] = 1 * mrna_length
    model._make_degradation_reaction(deg_stoich=mrna_deg_stoich,
                                     macromolecule=dummy_mrna,
                                     kind=mRNADegradation,
                                     scaled=True)
    model.add_mass_balance_constraint(dummy_transcription, dummy_mrna)

    return dummy_mrna
    def molecular_weight(self, record):
        '''
		Input:
			- record: a SeqRecord
		Output:
			- float: representing the molecular weight of the protein
		'''
        PA = ProteinAnalysis(str(record.seq))

        counter = Counter(str(record.seq))
        non_prot_count = sum(
            [v for k, v in counter.items() if k not in self.amino_acids])
        cleaned_seq = Seq(
            ''.join(c for c in str(record.seq) if c in self.amino_acids),
            IUPAC.protein)

        mol_weight = molecular_weight(seq=cleaned_seq,
                                      monoisotopic=PA.monoisotopic)
        avg_mol_weight = mol_weight / float(len(cleaned_seq))

        return mol_weight + non_prot_count * avg_mol_weight
Example #22
0
 def test_get_molecular_weight_identical(self):
     """Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
     mw_1 = self.analysis.molecular_weight()
     mw_2 = molecular_weight(Seq(self.seq_text, IUPAC.protein))
     self.assertAlmostEqual(mw_1, mw_2)
Example #23
0
        seq = ""
        for exon in allExons:
            useq = exon.sequence(myFasta, use_strand=False)
            seq += useq

        seq = seq.upper()

        if t.strand == "-":
            seq = Seq(seq).reverse_complement()

        else:
            seq = Seq(seq)

        try:
            aaSeq = seq.translate(stop_symbol="")
            aaWeight = molecular_weight(aaSeq, seq_type="protein")
            aaWeightMono = molecular_weight(aaSeq,
                                            seq_type="protein",
                                            monoisotopic=True)

            # protein_id	gene_symbol	mol_weight_kd	mol_weight
            print("{}\t{}\t{}\t{}\tnormal".format(geneName.upper(), geneName,
                                                  aaWeight / 1000, aaWeight),
                  file=sys.stdout)
            print("{}\t{}\t{}\t{}\tmonoisotopic".format(
                geneName.upper(), geneName, aaWeightMono / 1000, aaWeightMono),
                  file=sys.stdout)
        except:
            print("error", file=sys.stderr)
            print(transcriptID, geneName, file=sys.stderr)
            print(seq, file=sys.stderr)
Example #24
0
 def molecular_weight(self):
     """Calculate MW from Protein sequence"""
     return molecular_weight(self.sequence, monoisotopic=self.monoisotopic)
Example #25
0
 def calc_molecular_weight(self) -> float:
     """
     :return: protein seq molecular weight, float
     """
     return molecular_weight(self.get_seq(), seq_type='protein')
Example #26
0
				locusTag = str(feature.qualifiers["locus_tag"]).strip("[']")
				
				# count number of CXXCH motifs found in each protein coding sequence
				motifCount = len(hemeBindingMotifs)
				
				# cast translated amino acid sequence as string with [' AASEQ '] characters removed
				AAseq = str(feature.qualifiers["translation"]).strip("[']")
				
				# determine length of amino acid sequence **[' AASEQ '] characters must be removed for an accurate count!
				AAlength = len(str(feature.qualifiers["translation"]).strip("[']"))
				
				# if no ambiguous AAs present, calculate each cytochrome's molecular weight
				AmbiguousAA = re.findall('[BXZJUO]', str(feature.qualifiers["translation"]))
				if not AmbiguousAA:
				
					MolecularWeight = molecular_weight(AAseq, "protein")
				
				# calculate heme density as number of hemes per kDa
				HemeDensity = (float(motifCount) / MolecularWeight) * 1000
		
				if multihemeCytochromes:
					
					# add gene name to FASTA definition line if present in CDS feature qualifiers
					if 'gene' in feature.qualifiers:
					
						# define GeneName variable
						GeneName = str(feature.qualifiers["gene"]).strip("[']")
						
						# define output string in FASTA format if cytochromes were predicted
						OutputString = "%s\t%s\t%s\t%i\t%i\t%i\t%i\t%1.2f\t%1.3f\t%s\t%s\t%s" % (locusTag, str(feature.qualifiers["product"]).strip("[']"), GeneName, motifCount, len(CXXCHmotifs), len(CXXXCHmotifs), AAlength, float(MolecularWeight / 1000), HemeDensity, OrganismName.replace("_", " ").replace("sp ", "sp. "), RecordName, AAseq)
				
Example #27
0
 def calc_molecular_weight(self) -> float:
     """
     :return: protein seq molecular weight, float
     """
     return molecular_weight(self.get_seq(), seq_type='protein')
Example #28
0
from Bio.Seq import Seq
from Bio.Alphabet import generic_protein
from Bio.SeqUtils import molecular_weight

with open('input.txt', 'r') as file:
    for line in file:
        protein_seq = line.strip('\n')

print('%0.3f' %
      (molecular_weight(Seq(protein_seq, generic_protein), monoisotopic=True) -
       18.01056))
Example #29
0
 def test_get_monoisotopic_molecular_weight_identical(self):
     "Test calculating the protein molecular weight agrees with calculation from Bio.SeqUtils"
     self.analysis = ProtParam.ProteinAnalysis(self.seq_text, monoisotopic=True)
     mw_1 = self.analysis.molecular_weight()
     mw_2 = molecular_weight(Seq(self.seq_text, IUPAC.protein), monoisotopic=True)
     self.assertAlmostEqual(mw_1, mw_2)
Example #30
0
 def test_get_molecular_weight_identical(self):
     "Test calculating the protein molecular weight agrees with calculation from Bio.SeqUtils"
     mw_1 = self.analysis.molecular_weight()
     mw_2 = molecular_weight(Seq(self.seq_text, IUPAC.protein))
     self.assertAlmostEqual(mw_1, mw_2)
from Bio.SeqUtils import molecular_weight
import pyperclip

x = pyperclip.paste()
result = ("%0.3f" % molecular_weight(x, "protein"))
pyperclip.copy(result)
print(result)
            mono_index = i + steps[0]

            mono_M = float(file[mono_index].split(">")[1].split("<")[0])

            mod = file[i + 1:PTM_end]

            sorted_seq = ''.join(sorted(seq))

            t = cono(seq, mono_M, mod)

            #print("Seq: {0}, mono_mass: {1}, PTM: {2}".format(seq, mono_M, mod))

            cyc_num = seq.count("C")

            calculated_mass = molecular_weight(
                seq, "protein", monoisotopic=True) - 2 * int(cyc_num / 2)

            out = "Seq: {0}, mono_mass: {1}, calculated_mass: {2}, diff: {3}, PTM: {4}".format(
                seq, mono_M, calculated_mass, calculated_mass - mono_M, mod)

            #only pull out th Carboxylic E, Hydro-proline, Bromide-W modification
            screen = [(("Gla" in i) or ("O" in i) or ("BTr" in i))
                      for i in mod]

            if np.all(np.array(screen) == 1) and sorted_seq not in seq_lib:
                print(out + "\n")

                output.write("####seq{0}####\n".format(s))
                output.write(out + "\n")
                cono_server.append(t)
Example #33
0
 def molecular_weight(self):
     if not self._molecular_weight_override:
         return molecular_weight(
             self.rna) / 1000  # g.mol^-1 -> kg.mol^-1 (SI) = g.mmol^-1
     else:
         return self._molecular_weight_override
Example #34
0
 def mw(self):
     return molecular_weight(self.seq, 'protein')
Example #35
0
from Bio.Seq import Seq
my_seq = Seq("AGTACACTGGT")
print my_seq
print my_seq[10]
print my_seq[1:5]
print len(my_seq)
print my_seq.count( "A" )

from Bio.SeqUtils import GC, molecular_weight
print "GC: ", GC( my_seq )
print molecular_weight( my_seq )

from Bio.Alphabet import IUPAC
my_dna = Seq("AGTACATGACTGGTTTAG", IUPAC.unambiguous_dna)
print my_dna
print
print my_dna.alphabet
print my_dna.reverse_complement()

print my_dna.translate()

def main(*args, **kwargs):
    fpath = os.path.join(os.getcwd(),args[-1])
    s = str(StrongHold.parserDNAFile(fpath))
    mass = molecular_weight(s, seq_type='protein', circular=True, monoisotopic=True)
    print '%.3f' % mass
Example #37
0
 def molecular_weight(self):
     """Calculate MW from Protein sequence."""
     return molecular_weight(self.sequence, monoisotopic=self.monoisotopic)
import pandas as pd
import numpy as np
from Bio.SeqUtils import molecular_weight
from Bio import SeqIO
from collections import Counter
    
    
AA_LETTERS = sorted("ACEDGFIHKMLNQPSRTWVY")
out = pd.DataFrame(columns=[aa for aa in AA_LETTERS])
#

record = SeqIO.read('../supporting_data/U00096.gb', "gb")
## count amino acids per ORF and write to csv file    

i = 0
out = {}
for r in record.features:
    if r.type == 'CDS':
        data = r.qualifiers
        i += 1        
        try:
            data['molecular_weight[Da]'] = molecular_weight(data['translation'][0], seq_type='protein')
        except KeyError:
            continue
        out[i] = data

out = pd.DataFrame.from_dict(out).T
out.to_csv('ecoli_genome_info.tsv', sep='\t')

    #schmidtMW = schmidt['MW [kDa]']
#peeboMW = peebo['MW [kDa]']