def _get_feature_map(index='JOND920101'): """ To get the feature mapping object using the amino acid index given. The mapping is created using AAindex. '-' is mapped to 0.0. """ # Create a Feature object aaindex_map = get_aaindex_file(index) aaindex_map.mapping['-'] = 0.0 feat_map = Feature(aaindex_map) return feat_map
import os import sys sys.path.insert(0, os.path.abspath('..')) from quantiprot.utils.io import load_fasta_file from quantiprot.utils.feature import Feature, FeatureSet from quantiprot.metrics.aaindex import get_aaindex_file from quantiprot.metrics.basic import average # Load data: seq = load_fasta_file("data/Alphasyn.fasta") # Build a feature: average polarity (Graham, 1974), AAindex entry: GRAR740102: feat = Feature(get_aaindex_file("GRAR740102")).then(average) # Add the feature to new feature set: fs = FeatureSet("my set") fs.add(feat) # Process sequences: res_seq = fs(seq) # Export average polarities res = res_seq.columns() print res
'avg_abs': average_absolute, 'rec': recurrence, 'det': determinism, 'pal': palindromism, 'ratio_det': ratio_determinism, 'ratio_pal': ratio_palindromism, 'entropy': entropy } args = parser.parse_args() # Retrieve AAindex mappings for the properties if and as requested # property1 if args.property1 is not None: try: aa_mapping1 = get_aaindex_file(args.property1) except ValueError: aa_mapping1 = get_aaindex_www(args.property1) try: aa_mapping1.default = float(args.default1) except (TypeError, ValueError): aa_mapping1.default = args.default1 feat1 = Feature(aa_mapping1) else: feat1 = Feature(identity) # property2 if args.property2 is not None: try: aa_mapping2 = get_aaindex_file(args.property2) except ValueError:
fastaLength.append(len(seq.data)) fastaID.append(seq.identifier) for leng in fastaLength: count += leng print(fastaLength) # Set of features fs = FeatureSet("Basic Features") # Add feature names to set of features # Hydrophobicity (Zimmerman et al., 1968) fs.add(get_aaindex_file("ZIMJ680101")) # Average flexibility indices (Bhaskaran-Ponnuswamy, 1988) fs.add(get_aaindex_file("BHAR880101")) # Hydrophilicity value (Hopp-Woods, 1981) fs.add(get_aaindex_file("HOPT810101")) # Polarity (Grantham, 1974) fs.add(get_aaindex_file("GRAR740102")) # Conformational parameter of beta-structure (Beghin-Dirkx, 1975) fs.add(get_aaindex_file("BEGF750102")) # Hydrophobicity (Jones, 1975) fs.add(get_aaindex_file("JOND750101"))
group_ngrams.add_argument('-n', '--n', default='1', help='n-gram size (default: 1)') group_ngrams.add_argument('-m', '--metric', default='identity', choices=['identity', 'taxi', 'euclid', 'sup', 'inf'], help='metric for matching n-grams (default: identity)') group_ngrams.add_argument('-r', '--radius', default=0.0, help='similarity radius (default: 0.0)') args = parser.parse_args() # Load the 'input' sequence set input_seq = load_fasta_file(args.input, unique=False) # Retrieve AAindex mapping for the 'property' if args.property is not None: try: aa_mapping = get_aaindex_file(args.property) except ValueError: aa_mapping = get_aaindex_www(args.property) # Simplify if and as requested if args.simplify is not None: aa_mapping = simplify(aa_mapping, aa_mapping.__name__+"/"+args.classes, method=args.simplify, k=int(args.classes), iters=int(args.iterations)) # Assign 'default' value for the Mapping try: aa_mapping.default = float(args.default) except (TypeError, ValueError): aa_mapping.default = args.default
for record in SeqIO.parse("sequence_2.fasta", "fasta"): length_seqs.append(len(record)) #print((record)) #load the sequence from the file seq = load_fasta_file("sequence_2.fasta") SequenceIds = [] SequenceIds2_list = [] for i in SequenceSet.ids(seq): SequenceIds.append(i) for i in SequenceIds: SequenceIds2 = i[i.find("[") + 1:i.find("]")] SequenceIds2_list.append(SequenceIds2) #gather important protein features polarity = Feature(get_aaindex_file("GRAR740102")).then(average) hydropathy = Feature(get_aaindex_file("KYTJ820101")).then(average) iso_point = Feature(get_aaindex_file("ZIMJ680104")).then(average) pk_COOH = Feature(get_aaindex_file("JOND750102")).then(average) entropy_form = Feature(get_aaindex_file("HUTJ700103")).then(average) melting_point = Feature(get_aaindex_file("FASG760102")).then(average) net_charge = Feature(get_aaindex_file("KLEP840101")).then(average) glycine = Feature(pattern_count, pattern='G') RGD = Feature(pattern_count, pattern='RGD') GFPGER = Feature(pattern_count, pattern='GFPGER') #Build the feature set fs = FeatureSet("my set") fs1 = FeatureSet("test") fs2 = FeatureSet("glycine") fs3 = FeatureSet("GFPGER")
from quantiprot.utils.feature import Feature, FeatureSet # Conversions-related imports: from quantiprot.utils.mapping import simplify from quantiprot.metrics.aaindex import get_aa2charge, get_aa2hydropathy from quantiprot.metrics.aaindex import get_aaindex_file from quantiprot.metrics.basic import identity # Load the 'data/Alphasyn.fasta' sequence set, which contains several # peptides from alpha-synuclein deposed in the Amyload database: alphasyn_seq = load_fasta_file("data/Alphasyn.fasta") # Retrieve predefined mapping from aminoacids to formal charge, # and AAindex mapping to relative frequency of occurence (entry: JOND920101) aa2charge_map = get_aa2charge() aa2freq_map = get_aaindex_file("JOND920101") print aa2charge_map print aa2freq_map # Make Feature objects based on Mappings: charge_feat = Feature(aa2charge_map) freq_feat = Feature(aa2freq_map) print charge_feat print freq_feat # And use them to covert 1st sequence in 'alphasyn_seq': print charge_feat(alphasyn_seq[0]) print freq_feat(alphasyn_seq[0]) # Make a FeatureSet from a Feature and Mappings: fs = FeatureSet("basic features")