def main(input_mp4, output_mat, noise, speaker, num_features, num_frames, verbose, fps, log_spec): my_features = partial(features, frames=num_frames, features=num_features, fps=fps, use_mfcc=not log_spec) wav_to_features = compose(my_features, wav_read) mp4_to_features = compose(my_features, to_wav) train_classifiers2 = partial(train_classifiers, [noise] + list(speaker), wav_to_features, LinearSVC, verbose=verbose) results = map(train_classifiers2, [AdaBoostClassifier, GaussianNB, LinearSVC, partial(KNeighborsClassifier, n_neighbors=20)]) if verbose: for name, (_, score) in zip(["Ada", "GNB", "LSVM", "NN"], results): print(name, score) classify, _ = max(results, key=lambda x: (x[1][0] + x[1][1])/2) mp4_features = mp4_to_features(input_mp4).T if verbose: savemat("all_features.mat", {"all_features": mp4_features}) voice, person = classify(mp4_features) savemat(output_mat, {"voice": voice, "person": person})
def initializeDesktop(self, tk: Tkinter): compose( self.setTextArea, self.setButtons, self.setAppWindowSize, self.setAppTitle, )(tk)
def collect_git_projects(workspace): '''Collect the git projects in `workspace`''' wqueue = queue.Queue() compose(lambda projects: list(wqueue.put(x) for x in projects), partial(filter_project_folders, EXCLUDES), get_git_projects)(workspace) return wqueue
def get_arch_recipes(arch: str, dag: networkx.DiGraph): """Get all recipes compatible with the desired build architecture""" return filter( funcy.compose(funcy.rpartial(operator.__contains__, arch), funcy.rpartial(getattr, 'architectures')), filter( funcy.compose(functools.partial(operator.__eq__, 0), dag.in_degree), dag.nodes))
def executeQuery(self, query, data=None, debug=False, commit=True): basicCompose = compose(self._doQuery) if debug: basicCompose = compose(self._debug, basicCompose) if commit: basicCompose = compose(self.commitChanges, basicCompose) return basicCompose(query, data)
def main(root): pluck_paths = compose(lambda x: join_paths(*x), itemgetter(0, 2)) files = fnmatch.filter(imapcat(pluck_paths, os.walk(root)), '*.d.ts') for fpath in files: with open(fpath) as fp: parse(fp)
def selector(css_class: str) -> Callable: return compose( partial(lmap, str.strip), partial(select, notnone), partial(lmap, attrgetter("text")), partial(HtmlElement.cssselect, expr=css_class), )
def binsearch(r, oracle, eps=EPS, find_lambda=False): """Binary search over the diagonal of the rectangle. Returns the lower and upper approximation on the diagonal. """ f = diagonal_convex_comb(r) feval = fn.compose(oracle, f) lo, hi = 0, 1 # Early termination via bounds checks if feval(lo): result_type = SearchResultType.TRIVIALLY_TRUE hi = 0 elif not feval(hi): result_type = SearchResultType.TRIVIALLY_FALSE else: result_type = SearchResultType.NON_TRIVIAL mid = lo while hi - lo > eps: mid = lo + (hi - lo) / 2 lo, hi = (lo, mid) if feval(mid) else (mid, hi) if find_lambda: if result_type == SearchResultType.TRIVIALLY_TRUE: return result_type, -1 elif result_type == SearchResultType.TRIVIALLY_FALSE: return result_type, 2 return result_type, (lo+hi)/2 else: return result_type, mdtr.to_rec(zip(f(lo), f(hi)))
def transform(source_root: str, *directories: Iterable[str]) -> List[PykeTransform]: def transform_collect(directory: str): if source_root == directory: rest = [] else: rest = map( transform_collect, filter(os.path.isdir, [ f'{directory}/{dirent}' for dirent in os.listdir(directory) ])) if os.path.isfile(f'{directory}/Pykefile'): with open(f'{directory}/Pykefile') as pykefile: return itertools.chain( rest, map( lambda conf: PykeTransform(**conf, source_root=source_root, local_root=directory), ast.literal_eval(pykefile.read()))) return rest return [ *transform_flatten( map( funcy.compose(transform_flatten, transform_collect), filter( os.path.isdir, map( functools.partial(operator.__concat__, f'{source_root}/'), directories)))), *transform_flatten(transform_collect(source_root)) ]
def update_account_ops(mongo, username): """ This method will fetch entire account history, and back-fill any missing ops. """ for event in Account(username).history(): with suppress(DuplicateKeyError): transform = compose(strip_dot_from_keys, remove_body, json_expand, typify) mongo.AccountOperations.insert_one(transform(event))
def get_all_biobox_paths(config): """ Returns all paths listed in the biobox file """ f = funcy.compose(partial(funcy.pluck, 'value'), funcy.flatten, partial(funcy.mapcat, funcy.itervalues)) return list(f(config))
def spellCheckDocument(cls, text: str) -> str: this = cls() """ - document to list of words -- checked - loop on words select their matchers from db - if found then go to next word if not then do the following - send word and matchers list to spell check matcher - replace word in list after correction - then go to the next word - at the end join list with space - return the new corrected text - and return the array of mistaken words and their corrections """ # textList = this.textToList(text) # compose function run the functions give inside it from right to left applierFunc = compose( lambda textList: [this.correctWord(word) for word in textList ], # one line function to run on each word this.textToList) # correct the words applierFunc(text) return this.listToDocument(this.correctedTextList)
def scrape_operations(mongo): """Fetch all operations (including virtual) from last known block forward.""" indexer = Indexer(mongo) last_block = indexer.get_checkpoint('operations') log.info('\n> Fetching operations, starting with block %d...' % last_block) blockchain = Blockchain(mode="irreversible") history = blockchain.history( start_block=last_block, ) for operation in history: # insert operation with suppress(DuplicateKeyError): transform = compose(strip_dot_from_keys, json_expand, typify) mongo.Operations.insert_one(transform(operation)) # if this is a new block, checkpoint it, and schedule batch processing if operation['block_num'] != last_block: last_block = operation['block_num'] indexer.set_checkpoint('operations', last_block - 1) if last_block % 10 == 0: log.info("Checkpoint: %s (%s)" % ( last_block, blockchain.steem.hostname ))
def validate_operations(mongo): """ Scan latest N blocks in the database and validate its operations. """ blockchain = Blockchain(mode="irreversible") highest_block = mongo.Operations.find_one({}, sort=[('block_num', -1) ])['block_num'] lowest_block = max(1, highest_block - 250_000) for block_num in range(highest_block, lowest_block, -1): if block_num % 100 == 0: log.info('Validating block #%s' % block_num) block = list( blockchain.stream(start_block=block_num, end_block=block_num)) # remove all invalid or changed operations conditions = { 'block_num': block_num, '_id': { '$nin': [x['_id'] for x in block] } } mongo.Operations.delete_many(conditions) # insert any missing operations for op in block: with suppress(DuplicateKeyError): transform = compose(strip_dot_from_keys, json_expand, typify) mongo.Operations.insert_one(transform(op)) # re-process comments (does not re-add deleted posts) for comment in (x for x in block if x['type'] == 'comment'): upsert_comment(mongo, '%s/%s' % (comment['author'], comment['permlink']))
class Story(Model, Node): _schema = story_schema def _on_init(self): try: self._validate() except Exception as e: print('ERROR in validation for Story:') print() print(str(e)) print() print(self._yaml()) print() mpd = property(lambda self: get_manifest(self)) image = property(lambda self: get_image_url(self)) video = property(lambda self: get_video_url(self)) __repr__ = lambda self: f'Story(pk={self.pk})' location = property(lambda self: fallback( lambda: self['story_locations'][0]['location'], lambda: self[ 'story_locations']['location'], lambda: None)) geotag: compose( property, silent)(lambda self: self['story_locations'][0]['location']) swipeup_url = property( silent(lambda self: self['story_cta'][0]['links'][0]['webUri'])) spotyfy_song = property( lambda self: self['story_app_attribution']['content_url'])
def as_json(self): is_array = compose(isa(int), attrgetter("name")) if all(is_array, self.children): return [x.as_json() for x in self.children] else: return {x.name: x.as_json() for x in self.children}
def as_json(self): is_array = compose(isa(int), attrgetter("name")) if all(is_array, self.children): return [x.as_json() for x in self.children] else: return {x.name:x.as_json() for x in self.children}
def compose_wrap(call, param): middleware = settings.get(param, []) key = tuple(middleware) + (call._func,) try: composed = compose_wrap.cache[key] except KeyError: composed = compose_wrap.cache[key] = compose(*middleware)(call._func) return composed(*call._args, **call._kwargs)
def get_vertical_cages(rows): transpose_coordinates = lambda t: (t[1], t[0]) res = get_horizontal_cages(transpose(rows)) res = walk_keys(transpose_coordinates, res) transpose_coordinates_lst = compose(list, partial(map, transpose_coordinates)) res = walk_values(transpose_coordinates_lst, res) return res
def get_all_biobox_paths(config): """ Returns all paths listed in the biobox file """ f = funcy.compose( partial(funcy.pluck, 'value'), funcy.flatten, partial(funcy.mapcat, funcy.itervalues)) return list(f(config))
def prepare_biobox_file(config): """ Creates a biobox file in a temporary directory and returns a Docker volume string for that directory's location. """ f = funcy.compose(vol.biobox_file, cfg.create_biobox_directory, cfg.generate_biobox_file_content, cfg.remap_biobox_input_paths) return f(config)
def add_bingfa(): funcs = {'ME60': ME60.get_bingfa, 'ME60-X16': ME60.get_bingfa, 'M6000': M6k.get_bingfa} _get_bf = partial(_model, funcs) clear() nodes = graph.find('Bras') bras = [(x['ip'], x['model']) for x in nodes] lmap(compose(_add_bingfa, _get_bf), bras)
def process(self, text): fnlist = [ self.remove_accents, self.lower, self.clear_html, self.transform2words, self.filtertokens, self.stem ] return compose(*reversed(fnlist))(text)
def depobjs(self): return reversed( list( map( lambda pt: f'-l{pt.target.replace("lib", "", 1)}', filter( funcy.compose( functools.partial(operator.__eq__, 'object'), funcy.rpartial(getattr, 'build')), map(operator.itemgetter(1), self.binding_dgraph.out_edges(self))))))
def prepare_biobox_file(config): """ Creates a biobox file in a temporary directory and returns a Docker volume string for that directory's location. """ f = funcy.compose( vol.biobox_file, cfg.create_biobox_directory, cfg.generate_biobox_file_content, cfg.remap_biobox_input_paths) return f(config)
def add_bingfa(): funcs = { 'ME60': ME60.get_bingfa, 'ME60-X16': ME60.get_bingfa, 'M6000-S': M6k.get_bingfa, 'M6000': M6k.get_bingfa } _get_bf = partial(_model, funcs) clear() nodes = graph.find('Bras') bras = [(x['ip'], x['model']) for x in nodes] lmap(compose(_add_bingfa, _get_bf), bras)
def setup_slug(sender, instance, **kwargs): data = walk_values(compose(slugify, unicode), instance.__dict__) related = {item: data['_{0}_cache'.format(item)] for item in map(lambda u: u[0:-3], filter(lambda u: u.endswith('_id'), data.keys()))} data = merge(data, related) slug = self.populate_from.format( **data)[:self.max_length] if slug != getattr(instance, name): setattr(instance, name, slug) instance.save()
def add_infs(): funcs = {'zte': Zte.get_infs, 'hw': Huawei.get_infs} get_infs = partial(_company, funcs) clear_log() nodes = graph.cypher.execute( 'match (n:Olt) return n.ip as ip,n.company as company') olts = [dict(ip=x['ip'], company=x['company']) for x in nodes] pool = Pool(128) lock = Manager().Lock() _add_infs_p = partial(_add_infs, lock) list(pool.map(compose(_add_infs_p, get_infs), olts)) pool.close() pool.join()
def _map_gdl_to_publication(data_dict, obj): dataset = { "id": str(uuid.uuid3(uuid.NAMESPACE_DNS, str(data_dict['id']))), "type": "publications", "title": data_dict['title'], "creator": [a['name'] for a in data_dict['authors']], # "subject": data_dict, "notes": data_dict['description'], "publisher": data_dict.get('relatedOrganisation'), # "contributor": [a['name'] for a in data_dict['authors']], "date": data_dict.get('created'), "metadata_modified": data_dict.get('created'), # "publication_type": data_dict, # "format": data_dict, "identifier": data_dict['identifier'], "source": data_dict.get('source'), # "language": data_dict, # "relation": data_dict, # "spatial": data_dict, # "rights": data_dict, "license_id": 'notspecified', "member_countries": 'other', # relatedCountry, optional "harvest_source": 'GDL' } thematic_area = data_dict.get('thematicArea', {}).get('area') if thematic_area: dataset["thematic_area_string"] = thematic_area_mapping.get( thematic_area) related_country = data_dict.get('relatedCountry') if related_country: schema = sh.scheming_get_dataset_schema('publications') choices = sh.scheming_field_by_name(schema['dataset_fields'], 'member_countries')['choices'] member_country = F.first( F.filter( F.compose(F.rpartial(contains, related_country), itemgetter('label')), choices)) if member_country: dataset['member_countries'] = member_country['value'] spatial = get_extent_for_country(member_country['label']) if spatial: dataset['spatial'] = spatial['value'] if data_dict['file']: res_url = _gl_url(obj.source.url, 'download') + '?id=' + str( data_dict['id']) res = {'name': data_dict['file'], 'url': res_url} res['format'] = splitext(res['name'])[1].lstrip('.') dataset['resources'] = [res] return dataset
def __init__(self, target: str, arch: str, destination=None, commands=[], build: str = 'application', c_files=[], c_flags=[], s_files=[], s_flags=[], includes=[], l_file=None, l_flags=[], dependencies=[], output: str = None, source_root=None, local_root=None): assert source_root is not None and local_root is not None bound_format = functools.partial(PykeTransform.__format, source_root, local_root) self.architectures = Architecture.generate_arch_list( *([arch] if not isinstance(arch, list) else arch)) self.selected_arch: Architecture = None self.target = target self.destination = f'initrd{destination}' if destination is not None else 'initrd/' self.commands = commands self.c_files = list(map(bound_format, c_files)) self.s_files = list(map(bound_format, s_files)) self.l_file = bound_format(l_file) if l_file is not None else None self.c_flags = ' '.join(c_flags) self.s_flags = ' '.join(s_flags) self.l_flags = ' '.join([ *l_flags, *(['-T build/generated/' + os.path.basename(self.l_file)] if l_file is not None else []) ]) self.dependencies = dependencies self.output = output self.includes = ' '.join( map( funcy.compose(functools.partial(operator.__concat__, '-I'), bound_format), includes)) self.build = build self.binding_dgraph: networkx.DiGraph = None self.generated_make = False self.comp = False self.comp_objs = False
def run(self, args): import IPython import funcy from databot.shell import ShellHelper bot = self.bot # noqa pipe = ShellHelper(self.bot) # noqa take = funcy.compose(list, funcy.take) # noqa IPython.embed(header='\n'.join([ 'Available objects and functions:', ' bot - databot.Bot instance', ' pipe - helper for accessing pipe instances, type `pipe.<TAB>` to access a pipe', ' take - takes n items from an iterable, example: take(10, pipe.mypipe.data.values())', ]))
def decorator(func): cached_func = _cached(*dargs, **dkwargs)(compose(force_render, func)) @wraps(func) def wrapper(request, *args, **kwargs): assert isinstance(request, HttpRequest), \ "A view should be passed with HttpRequest as first argument" if request.method not in ('GET', 'HEAD'): return func(request, *args, **kwargs) return cached_func(request, *args, **kwargs) if hasattr(cached_func, 'invalidate'): wrapper.invalidate = cached_func.invalidate wrapper.key = cached_func.key return wrapper
def decorator(func): dkwargs['key_func'] = view_cache_key cached_func = _cached(*dargs, **dkwargs)(compose(force_render, func)) @wraps(func) def wrapper(request, *args, **kwargs): assert isinstance(request, HttpRequest), \ "A view should be passed with HttpRequest as first argument" if request.method not in ('GET', 'HEAD'): return func(request, *args, **kwargs) return cached_func(request, *args, **kwargs) if hasattr(cached_func, 'invalidate'): wrapper.invalidate = cached_func.invalidate wrapper.key = cached_func.key return wrapper
def user_passes_test(test, message='Permission required', status=403): @decorator def deco(call): try: attempt_auth(call.request) except PermissionDenied as e: return json(status, detail=str(e)) # Check test if test(call.request.user): return call() else: return json(status, detail=message) def csrf_exempt(func): func.csrf_exempt = True return func return compose(csrf_exempt, deco)
def amap(f, g): """Lift a scalar transformer f into an iterable :param f: transformer of type a -> b where a, b are scalars :type f: function :param g: mapper of type L x (returns an iterator) :type g: function :return: an iterable over domain of g with f mapped over range of g :rtype: function >>> def foo(x): ... for i in range(x): ... yield i >>> bar = amap(lambda x: x + 10, foo) >>> list(bar(5)) [10, 11, 12, 13, 14] """ return compose(partial(imap, f), g)
def add_power_info(): funcs = {'S8508': S85.get_power_info, 'S8505': S85.get_power_info, 'T64G': T64.get_power_info, 'S8905': S89.get_power_info, 'S8905E': S8905E.get_power_info, 'S9306': S93.get_power_info, 'S9303': S93.get_power_info} get_power_info = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model") switches = [dict(ip=x['ip'], model=x['model']) for x in nodes] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_power_info, lock) list(pool.map(compose(_ff, get_power_info), switches)) pool.close() pool.join()
def get_bingfa(ip): def _get_users(child, slot): record = do_some(child, 'disp max-online slot {s}'.format(s=slot)) users = re_find(r'Max online users since startup\s+:\s+(\d+)', record) users = int(users or 0) date = re_find(r'Time of max online users\s+:\s+(\d{4}-\d{2}-\d{2})', record) return (slot, users, date) try: child = telnet(ip) rslt = do_some(child, 'disp dev | in BSU') ff = compose(partial(select, bool), partial(map, r'(\d+)\s+BSU')) slots = ff(rslt.split('\r\n')) maxUsers = lmap(partial(_get_users, child), slots) close(child) except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) return ('success', maxUsers, ip)
def decorate_method(self, impl, method): print('decorating {}::{}'.format(impl, method)) try: undecorated_method = getattr(impl, method) except AttributeError: print( 'class {} does not implement method {}'.format( impl.__name__, method ) ) raise setattr( impl, method, compose( self.api.doc(parser=impl.parser), marshal_with(impl.model) )(undecorated_method) )
def add_traffics(): funcs = {'S8508': S85.get_traffics, 'S8505': S85.get_traffics, 'T64G': T64.get_traffics, 'S8905': S89.get_traffics, 'S8905E': S8905E.get_traffics, 'S9306': S93.get_traffics, 'S9303': S93.get_traffics} get_traffics = partial(_model, funcs) # clear_log() nodes = graph.cypher.execute( "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model") switchs = [dict(ip=x['ip'], infs=x['infs'], model=x['model']) for x in nodes] pool = Pool(processor) lock = Manager().Lock() _ff = partial(_add_traffics, lock) list(pool.map(compose(_ff, get_traffics), switchs)) pool.close() pool.join()
def set_tfiles(opts): if not opts['--format']: opts['tfiles'] = opts.get('tfiles', supported_formats()) return opts opts['tfiles'] = opts.get('tfiles', []) sformat = opts.get('--format', '') lformat = [] if ',' in sformat: lformat = sformat.rsplit(',') elif ' ' in sformat: lformat = sformat.split() else: lformat = [sformat] lformat = fmap(str.strip, lformat) for ext in supported_formats(): if ext in lformat: opts['tfiles'].append(ext) opts['tfiles'] = compose(list, set)(opts['tfiles']) return opts
def gen_stats(data, training, test, n_speakers, is_voice, which_speaker): pca = PCA(n_components=2) pca.fit(hstack(data).T) pca_data = map(compose(transpose, pca.transform, transpose), data) for (color, marker), sp in zip([("red", "o"), ("blue", "x"), ("green", "*")], pca_data): scatter(sp[0], sp[1], color=color, marker=marker) legend(["silence", "speaker1", "speaker2"], loc='upper left',) title("PCA 2 Components of MFCC") savefig("pca_2_comp_mfcc.svg") X2, labels2 = dense_data_and_labels(zip(test, [0] + n_speakers*[1])) score1 = is_voice.score(X2, labels2) X3, labels3 = dense_data_and_labels(zip(test[1:], range(1, n_speakers+1))) score2 = which_speaker.score(X3, labels3) savemat("data.mat", {"sp" + str(i): elem for i, elem in enumerate(data)}) return score1, score2
def get_mbta_station_info(cfg): route_info = query_mbta_id("routes", cfg['route']) stop_info = query_mbta_id("stops", cfg['stop']) params = (('filter[stop]', cfg['stop']), ('filter[route]', cfg['route']), ('page[limit]', '10')) arrivals = query_mbta('predictions', params) by_direction = f.walk_values( vectorize(f.compose(relative_ts, op.itemgetter('arrival_time'))), f.group_by(op.itemgetter('direction_id'), arrivals)) return [ f.merge( { "station": stop_info['name'], "route": cfg['route'], "direction": route_info['direction_destinations'][k], }, dict(zip(range(5), pad(v, 5)))) for k, v in by_direction.items() ]
def get_bingfa(ip): def _get_users(child, slot): record = do_some( child, 'disp max-online slot {s}'.format(s=slot)) users = re_find( r'Max online users since startup\s+:\s+(\d+)', record) users = int(users or 0) date = re_find( r'Time of max online users\s+:\s+(\d{4}-\d{2}-\d{2})', record) return (slot, users, date) try: child = telnet(ip) rslt = do_some(child, 'disp dev | in BSU') ff = compose(partial(select, bool), partial(map, r'(\d+)\s+BSU')) slots = ff(rslt.split('\r\n')) maxUsers = lmap(partial(_get_users, child), slots) close(child) except (pexpect.EOF, pexpect.TIMEOUT) as e: return ('fail', None, ip) return ('success', maxUsers, ip)
def log_spectrogram(x, win_size=WIN_SIZE, hop=HOP): to_freq = compose(np.transpose, np.matrix, np.fft.fft, partial(mul, np.bartlett(win_size))) return log(np.abs(np.array(np.hstack(map(to_freq, windows(x, win_size, hop)))))[win_size/2:,:])
def power_check(): clear_log() nodes = graph.find('Olt', property_key='company', property_value='hw') olts = [(x['ip'], x['company'], x['area']) for x in nodes] funcy.lmap(funcy.compose(output_info, get_power_info), olts)
import csv from dateutil import parser import datetime from time import mktime from funcy import compose from funcy.py2 import map, zip try: #below import is necessary for some reason from scipy.stats import poisson import scipy DISTRIBUTION = scipy.stats.poisson except ImportError: DISTRIBUTION = None years = r'190\\d|19[1-9]\\d|200\\d|201[0-5]' #1900-2015 year_regex = re.compile(years) hamming = compose(sum, partial(map, operator.ne)) timestamp = lambda x: mktime(x.timetuple()) legend = {"queries": 'r', "references": 'b', "interval": 'g'} #def pdist(s1, s2): # assert len(s1) == len(s2), "All sequences must be the same length! %s %s" % (s1, s2) # return hamming(s1, s2)/float(len(s1)) class InvalidFastaIdentifier(Exception): pass def extract_date(fasta_id): ''' fasta id ''' _e = InvalidFastaIdentifier("Could retrieve date from {0}".format(fasta_id)) if '____' not in fasta_id: raise _e s = fasta_id.split('____')[-1] try: dt = parser.parse(s.replace('_','/')) return dt
from __future__ import division import json import os.path as path import funcy as fp from numpy.testing import assert_array_equal, assert_allclose import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal, assert_series_equal from pyLDAvis import prepare roundtrip = fp.compose(json.loads, lambda d: d.to_json(), prepare) DATA_DIR = path.join(path.dirname(path.realpath(__file__)), "../data/") def load_dataset(name): with open(path.join(DATA_DIR, '%s_input.json' % name), 'r') as j: data_input = json.load(j) with open(path.join(DATA_DIR, '%s_output.json' % name), 'r') as j: expected = json.load(j) return data_input, expected def remove_col_suffixes(df): df.columns = [w.split('_')[0] for w in df.columns] return df
:param Bio.SeqRecord rec: genbank record from SeqIO.parse format='genbank' :return iterable genes: iterator of gene objects (features with mat_peptied as their type) ''' #Don't include `CDS`, that's whole-genome polypeptide EXCLUDE_GENE_TYPES = ('source',) genes = filter(lambda x: x.type not in EXCLUDE_GENE_TYPES, rec.features) starts_ends_names = map(lambda f: ( gene_name(f), int(f.location.start), int(f.location.end), ), genes) return starmap(Gene, starts_ends_names) def fetch_record_by_id(_id): return Entrez.efetch(db="nucleotide", id=_id, rettype='gb', retmode='text').read() seq_parse_gb = partial(SeqIO.parse, format="genbank") parse_fasta = partial(SeqIO.parse, format="fasta") #assume genbank file only has one record (so use `next`) id_to_record = compose(next, seq_parse_gb, StringIO, fetch_record_by_id) id_to_genes = compose(seqrecord_to_genes, id_to_record) genbank_file_to_genes = compose(seqrecord_to_genes, next, seq_parse_gb) DEGENS = ['S', 'R', 'D', 'W', 'V', 'Y', 'H', 'K', 'B', 'M'] degen_positions = lambda seq: (m.start() for m in re.finditer('|'.join(DEGENS), seq)) def row_to_gene(row): ''' :param str row: row from csv.reader containing int1, int2, str (start, stop, name). header is ignored, and fields can be in any order, but the first integer must be less than the second. :return Gene gene: Gene object ''' row = map(str.strip, row) digits, _gene_name = split(str.isdigit, row) start, end = map(int, digits) assert start < end, "Start field should be first and less than end field. You supplied start %s end %s for gene %s" % (start, end, _gene_name[0])
def array_to_field(self, field): inside = field.extract_array_subtype(field.type) return compose( model_fields.List, self.get_typename_marshaler(inside) )(field)