def link_regions_to_fed(args): #try: db_session = DBSession() transaction.manager.begin() # 1. Get all federals fds = db_session.query(FederalDistrict).all() # 2. Get all regions regions = db_session.query(Region).all() # 3. Read csv file with federals (for get original fed id) csv_fds = get_from_csv(path.join(BASE_PATH, 'federal_districts.csv')) # 4. Read updated csv file with regions and federal ids csv_regions = get_from_csv(path.join(BASE_PATH, 'regions.csv')) # 5. Update regions in DB for region in regions: # get fed_id from csv by region_code orig_fed_id = next(ifilter(lambda x: x['region_code'] == str(region.region_code), csv_regions))['fed_id'] # get original federal from csv orig_fed = next(ifilter(lambda x: x['id'] == orig_fed_id, csv_fds)) # find federal in db by short_name db_fed = next(ifilter(lambda fed: fed.short_name == unicode(orig_fed['short_name'], 'utf8'), fds)) # update db region region.federal_dist = db_fed transaction.manager.commit() db_session.close() print ('Region was linked with federal districts')
def edges(self, src_node=None, dst_node=None): _edges = self._edges if src_node: _edges = ifilter(lambda x: x.source == src_node.id, _edges) if dst_node: _edges = ifilter(lambda x: x.target == dst_node.id, _edges) return _edges
def _guess_cover(self, files): """Return the filename within <files> that is the most likely to be the cover of an archive using some simple heuristics. """ # Ignore MacOSX meta files. files = itertools.ifilter(lambda filename: u'__MACOSX' not in os.path.normpath(filename).split(os.sep), files) # Ignore credit files if possible. files = itertools.ifilter(lambda filename: u'credit' not in os.path.split(filename)[1].lower(), files) images = list(itertools.ifilter(image_tools.is_image_file, files)) tools.alphanumeric_sort(images) front_re = re.compile('(cover|front)', re.I) candidates = filter(front_re.search, images) candidates = [c for c in candidates if 'back' not in c.lower()] if candidates: return candidates[0] if images: return images[0] return None
def get_containers(template): # Build a tree of the templates we're using, placing the root template first. levels = build_extension_tree(template.nodelist) contentlet_specs = [] contentreference_specs = SortedDict() blocks = {} for level in reversed(levels): level.initialize() contentlet_specs.extend(itertools.ifilter(lambda x: x not in contentlet_specs, level.contentlet_specs)) contentreference_specs.update(level.contentreference_specs) for name, block in level.blocks.items(): if block.block_super: blocks.setdefault(name, []).append(block) else: blocks[name] = [block] for block_list in blocks.values(): for block in block_list: block.initialize() contentlet_specs.extend(itertools.ifilter(lambda x: x not in contentlet_specs, block.contentlet_specs)) contentreference_specs.update(block.contentreference_specs) return contentlet_specs, contentreference_specs
def ignore(context): '''.ignore nick!user@host''' bot.config.setdefault('IGNORE', []) if context.args: to_ignore = glob(context.args) supersets = list(ifilter(lambda ignored: to_ignore.issub(glob(ignored)), bot.config['IGNORE'])) if len(supersets) > 0: return 'Not ignoring \x02%s\x02 because it is already matched by \x02%s\x02' % (context.args, supersets[0]) filter = lambda ignored: to_ignore.issuper(glob(ignored)) removed = list(ifilter(filter, bot.config['IGNORE'])) bot.config['IGNORE'] = list(ifilterfalse(filter, bot.config['IGNORE'])) bot.config['IGNORE'].append(context.args) save_ignores() bot.log(context, ('IGNORE'), '+{0}{1}'.format(context.args, (' -' + ' -'.join(removed)) if removed else '')) if removed: return 'Ignored and removed \x02%d\x02 redundant ignores: \x02%s\x02' % (len(removed), '\x02, \x02'.join(removed)) else: return 'Ignored.' else: return eval.__doc__
def master_up(key_name, credential_file="~/.rackspace_cloud_credentials"): ''' Create a salt-master on Rackspace Alternatively create the master using nova ''' # Authenticate with Rackspace, use credential file pyrax.set_setting("identity_type", "rackspace") pyrax.set_credential_file(os.path.expanduser(credential_file)) # Shorthand cs = pyrax.cloudservers # Building Ubuntu 12.04 boxes with 512 MB RAM iter_flavors = ifilter(lambda flavor: flavor.ram == 512, cs.flavors.list()) flavor_512 = iter_flavors.next() iter_os = ifilter(lambda img: "Ubuntu 12.04" in img.name, cs.images.list()) ubu_image = iter_os.next() master = cs.servers.create("master.ipython.org", ubu_image.id, flavor_512, key_name=key_name) master = pyrax.utils.wait_for_build(master, verbose=True) env.hosts = [master.accessIPv4] print("Master IP: {}".format(master.accessIPv4)) return master.accessIPv4
def _resolveSpecialSegment(self, segmentB, specialResolutionMethods): resolutionMethodExecutor = _compileRules(specialResolutionMethods, 3) for (resolutionMethod, args) in resolutionMethodExecutor[True]: iterables = [] for arg in args: iterables.append(itertools.repeat(arg)) resolutions = itertools.imap(resolutionMethod, self.allCorrectSinglePossibilities(), *iterables) correctAB = itertools.izip(self.allCorrectSinglePossibilities(), resolutions) correctAB = itertools.ifilter( lambda possibAB: possibility.pitchesWithinLimit(possibA=possibAB[1], maxPitch=segmentB._maxPitch), correctAB, ) if self.fbRules.applyConsecutivePossibRulesToResolution: correctAB = itertools.ifilter( lambda possibAB: self._isCorrectConsecutivePossibility(possibA=possibAB[0], possibB=possibAB[1]), correctAB, ) if self.fbRules.applySinglePossibRulesToResolution: segmentB._singlePossibilityRuleChecking = _compileRules( segmentB.singlePossibilityRules(segmentB.fbRules) ) correctAB = itertools.ifilter( lambda possibAB: segmentB._isCorrectSinglePossibility(possibA=possibAB[1]), correctAB ) return correctAB raise SegmentException("No standard resolution available.")
def ts_guess_manifest_v1(ts_path): """Guesses the values of manifest fields in a timestream """ # This whole thing's one massive f*****g kludge. But it seems to work # pretty good so, well, whoop. retval = {} # get a sorted list of all files all_files = [] for root, folders, files in os.walk(ts_path): for folder in folders: if folder.startswith("_"): folders.remove(folder) for fle in files: all_files.append(path.join(root, fle)) all_files = sorted(all_files) # find most common extension, and assume this is the ext exts = collections.Counter(IMAGE_EXT_CONSTANTS) our_exts = map(lambda x: path.splitext(x)[1][1:], all_files) our_exts = ifilter(lambda x: x.lower() in IMAGE_EXT_CONSTANTS, our_exts) for ext in our_exts: try: exts[ext] += 1 except KeyError: pass # most common gives list of tuples. [0] = (ext, count), [0][0] = ext retval["extension"] = exts.most_common(1)[0][0] all_files = ifilter( lambda x: path.splitext(x)[1][1:] == retval["extension"], all_files) # get image type from extension: try: retval["image_type"] = IMAGE_EXT_TO_TYPE[retval["extension"]] except KeyError: retval["image_type"] = None # Get list of images: images = ifilter( lambda x: path.splitext(x)[1][1:] == retval["extension"], all_files) # decode times from images: times = map(ts_parse_date_path, sorted(images)) # get first and last dates: try: retval["start_datetime"] = ts_format_date(times[0]) retval["end_datetime"] = ts_format_date(times[-1]) except IndexError: msg = "{} is an invalid V1 timestream".format(ts_path) LOG.error(msg) raise ValueError(msg) # Get time intervals between images intervals = list() for iii in range(len(times) - 1): interval = times[iii + 1] - times[iii] intervals.append(interval.seconds) retval["interval"] = max(min(intervals), 1) retval["name"] = path.basename(ts_path.rstrip(os.sep)) # This is dodgy isn't it :S retval["missing"] = [] # If any of this worked, it must be version 1 retval["version"] = 1 return retval
def _analyseRepository(self, repositoryName, dateSince): reportfile = os.path.join(self.eventpath, repositoryName + ".events") records, seconds = 0, 0.0 if not isfile(reportfile): return records, seconds events = open(reportfile) try: split = lambda l: map(str.strip, l.split("\t")) begintime = None datefilter = lambda (date, x, y, z): date[1:-1] >= dateSince allevents = imap(split, ifilter(str.strip, events)) for date, event, anIdentifier, comments in ifilter(datefilter, allevents): if event == "STARTHARVEST": begintime = parseToTime(date[1:-1]) harvested = uploaded = deleted = total = -1 elif event == "ENDHARVEST": if begintime and harvested > -1: endtime = parseToTime(date[1:-1]) if endtime > begintime: records += int(uploaded) + int(deleted) seconds += diffTime(endtime, begintime) begintime = None elif event == "SUCCES": match = NUMBERS_RE.match(comments) if match: harvested, uploaded, deleted, total = match.groups() finally: events.close() return records, seconds
def PosSecStruc(Pairs, PairNum): # discards structures in which one base pairs more than one time def DuplicatesCheck(p): subcharL = [] for char in p: for subchar in char: subcharL.append(subchar) for subchar in subcharL: if subcharL.count(subchar) > 1: return False return True #discardes structures in which the basepairs would overlap def OverlapCheck(ps): struc = sorted(ps) for i in xrange(len(struc)): for j in xrange(i +1,len(struc)): if struc[i][0] < struc[j][0] < struc[i][1] < struc[j][1]: return False if struc[j][0] < struc[i][0] < struc[j][1] < struc[i][1]: return False return True # this is very efficient code!!!!!! <--- REUSE strucs = list(ifilter(lambda x: DuplicatesCheck(x), list(combinations(Pairs, PairNum)))) out = list(ifilter(lambda x: OverlapCheck(x), strucs)) return out
def iteritems(self): definitions = type(self).configuration_setting_definitions version = self.command.protocol_version return ifilter( lambda (name, value): value is not None, imap( lambda setting: (setting.name, setting.__get__(self)), ifilter( lambda setting: setting.is_supported_by_protocol(version), definitions)))
def everything(cls, *tags, **location): """Read all of the specified `tags` within the database using the cache. Returns a tuple of the format `(Globals, Contents, Frames)`. Each field is a dictionary keyed by location or offset that retains the tags that were read. If the boolean `location` was specified then key each contents tag by location instead of address. """ global export # collect all the globals into a dictionary six.print_(u'--> Grabbing globals (cached)...', file=output) iterable = export.globals(*tags) Globals = {ea : res for ea, res in itertools.ifilter(None, iterable)} # grab all the contents into a dictionary six.print_(u'--> Grabbing contents from functions (cached)...', file=output) location = location.get('location', False) iterable = export.contents(*tags, location=location) Contents = {loc : res for loc, res in itertools.ifilter(None, iterable)} # grab any frames into a dictionary six.print_(u'--> Grabbing frames from functions (cached)...', file=output) iterable = export.frames(*tags) Frames = {ea : res for ea, res in itertools.ifilter(None, iterable)} # return it back to the user return Globals, Contents, Frames
def ignore(context): """.ignore nick!user@host""" bot.config.setdefault("IGNORE", []) if not utils.isadmin(context.line["prefix"], bot): return if context.args: to_ignore = glob(context.args) supersets = list(ifilter(lambda ignored: to_ignore.issub(glob(ignored)), bot.config["IGNORE"])) if len(supersets) > 0: return "Not ignoring \x02%s\x02 because it is already matched by \x02%s\x02" % (context.args, supersets[0]) filter = lambda ignored: to_ignore.issuper(glob(ignored)) removed = list(ifilter(filter, bot.config["IGNORE"])) bot.config["IGNORE"] = list(ifilterfalse(filter, bot.config["IGNORE"])) bot.config["IGNORE"].append(context.args) save_ignores() if len(removed) > 0: return "Ignored and removed \x02%d\x02 redundant ignores: \x02%s\x02" % ( len(removed), "\x02, \x02".join(removed), ) else: return "Ignored." else: return eval.__doc__
def containers(self): """ Returns a tuple where the first item is a list of names of contentlets referenced by containers, and the second item is a list of tuples of names and contenttypes of contentreferences referenced by containers. This will break if there is a recursive extends or includes in the template code. Due to the use of an empty Context, any extends or include tags with dynamic arguments probably won't work. """ template = DjangoTemplate(self.code) # Build a tree of the templates we're using, placing the root template first. levels = build_extension_tree(template.nodelist) contentlet_specs = [] contentreference_specs = SortedDict() blocks = {} for level in reversed(levels): level.initialize() contentlet_specs.extend(itertools.ifilter(lambda x: x not in contentlet_specs, level.contentlet_specs)) contentreference_specs.update(level.contentreference_specs) for name, block in level.blocks.items(): if block.block_super: blocks.setdefault(name, []).append(block) else: blocks[name] = [block] for block_list in blocks.values(): for block in block_list: block.initialize() contentlet_specs.extend(itertools.ifilter(lambda x: x not in contentlet_specs, block.contentlet_specs)) contentreference_specs.update(block.contentreference_specs) return contentlet_specs, contentreference_specs
def get_min_expected_inventory_in_interval( self, product_id, start_time, end_time, inventory_status_id=None, scheduled_adjustment_exclude_id=None ): starting_inventory = self.get_net_inventory(product_id, inventory_status_id) sa_qs = self.scheduled_adjustments.by_product(product_id).before(end_time).pending_only() if scheduled_adjustment_exclude_id: sa_qs.exclude(id=scheduled_adjustment_exclude_id) scheduled_adjustments = list(sa_qs) scheduled_adjustments_before_interval = ifilter(lambda sa: sa.expected_time < start_time, scheduled_adjustments) scheduled_adjustments_during_interval = ifilter( lambda sa: sa.expected_time >= start_time, scheduled_adjustments ) net_sa_before_interval = sum(sa.qty for sa in scheduled_adjustments_before_interval) current_inventory = starting_inventory + net_sa_before_interval min_inventory = current_inventory for sa in scheduled_adjustments_during_interval: current_inventory += sa.qty min_inventory = min(current_inventory, min_inventory) return min_inventory
def merge_schemas(to_schema, from_schema, inplace=False): """Recursively merge from_schema into to_schema Takes care of leaving to_schema intact if inplace is False (default). Returns a new Schema instance if inplace is False or to_schema is a Schema class not an instance or the changed to_schema. """ # Nested schemas may be Schema (sub-)classes instead of instances. # Copying Schema classes does not work, so we just create an instance. if isclass(to_schema) and issubclass(to_schema, validators.Schema): to_schema = to_schema() elif not inplace: to_schema = copy_schema(to_schema) # Recursively merge child schemas is_schema = lambda f: isinstance(f[1], validators.Schema) seen = set() for k, v in ifilter(is_schema, to_schema.fields.iteritems()): seen.add(k) from_field = from_schema.fields.get(k) if from_field: v = merge_schemas(v, from_field) to_schema.add_field(k, v) # Add remaining fields if we can can_add = lambda f: f[0] not in seen and can_add_field(to_schema, f[0]) for field in ifilter(can_add, from_schema.fields.iteritems()): to_schema.add_field(*field) return to_schema
def iter_nodes_local_first(self, ring, partition): """ Yields nodes for a ring partition. If the 'write_affinity' setting is non-empty, then this will yield N local nodes (as defined by the write_affinity setting) first, then the rest of the nodes as normal. It is a re-ordering of the nodes such that the local ones come first; no node is omitted. The effect is that the request will be serviced by local object servers first, but nonlocal ones will be employed if not enough local ones are available. :param ring: ring to get nodes from :param partition: ring partition to yield nodes for """ primary_nodes = ring.get_part_nodes(partition) num_locals = self.app.write_affinity_node_count(len(primary_nodes)) is_local = self.app.write_affinity_is_local_fn if is_local is None: return self.app.iter_nodes(ring, partition) all_nodes = itertools.chain(primary_nodes, ring.get_more_nodes(partition)) first_n_local_nodes = list(itertools.islice(itertools.ifilter(is_local, all_nodes), num_locals)) # refresh it; it moved when we computed first_n_local_nodes all_nodes = itertools.chain(primary_nodes, ring.get_more_nodes(partition)) local_first_node_iter = itertools.chain( first_n_local_nodes, itertools.ifilter(lambda node: node not in first_n_local_nodes, all_nodes) ) return self.app.iter_nodes(ring, partition, node_iter=local_first_node_iter)
def l_of_d_intersection(ld0, ld1, keys): """ Find intersection between a list of dicts and a list of dicts/objects :param ld0 :type [DictType] or :type [AnyObject] :param ld1 :type [DictType] :param keys :type ListType :returns intersection of ld0 and ld1 where key is equal. At best, will return `ld0` in full. At worst: []. """ list0, list1 = ld0, ld1 if len(list0) and type(list0[0]) is not DictType: _class = type(list0[0]) list0 = map(obj_to_d, list0) if len(list1) and type(list1[0]) is not DictType: _class = type(list1[0]) list1 = map(obj_to_d, list1) processed_ld0 = frozenset( ifilter(None, imap(lambda (idx, obj): normalise(idx, obj, keys, id(obj)), enumerate(list0)))) processed_ld1 = frozenset( ifilter(None, imap(lambda (idx, obj): normalise(idx, obj, keys, id(obj)), enumerate(list1)))) return (ld0[res.idx] for result in processed_ld0.intersection(processed_ld1) for res in result.values())
def articles_from_issue(path, issue_filename): issue = graph.resource(URIRef(u'http://miskinhill.com.au/' + path)) for article in ifilter(lambda r: has_type(r, MHS.Article), issue.subjects(DC.isPartOf)): assert article.identifier.startswith(issue.identifier), article.identifier generate_pdf(os.path.join(path, issue_filename), os.path.join(path, article.identifier.rsplit('/', 1)[1] + '.pdf'), article.value(MHS.startPage).toPython() + issue.value(MHS.frontMatterExtent).toPython(), article.value(MHS.endPage).toPython() + issue.value(MHS.frontMatterExtent).toPython(), entities(striptags(article.value(DC.title).toPython())), entities('; '.join(c.value(FOAF.name).toPython() for c in article.objects(DC.creator)))) for review in ifilter(lambda r: has_type(r, MHS.Review), issue.subjects(DC.isPartOf)): assert review.identifier.startswith(issue.identifier + 'reviews/'), review.identifier generate_pdf(os.path.join(path, issue_filename), os.path.join(path, 'reviews', review.identifier.rsplit('/', 1)[1] + '.pdf'), review.value(MHS.startPage).toPython() + issue.value(MHS.frontMatterExtent).toPython(), review.value(MHS.endPage).toPython() + issue.value(MHS.frontMatterExtent).toPython(), entities('Review of ' + ' and '.join(striptags(b.value(DC.title).toPython()) for b in review.objects(MHS.reviews))), entities('; '.join(c.value(FOAF.name).toPython() for c in review.objects(DC.creator)))) for obituary in ifilter(lambda r: has_type(r, MHS.Obituary), issue.subjects(DC.isPartOf)): assert obituary.identifier.startswith(issue.identifier), obituary.identifier if obituary.value(DC.title): title = striptags(obituary.value(DC.title).toPython()) else: title = 'In memoriam %s' % obituary.value(MHS.obituaryOf).value(FOAF.name).toPython() generate_pdf(os.path.join(path, issue_filename), os.path.join(path, obituary.identifier.rsplit('/', 1)[1] + '.pdf'), obituary.value(MHS.startPageInFrontMatter).toPython(), obituary.value(MHS.endPageInFrontMatter).toPython(), entities(title), entities('; '.join(c.value(FOAF.name).toPython() for c in obituary.objects(DC.creator))))
def main(): """ create or append a logfile with name yyyy-mm.log as an excerpt of mediatum.log of lines beginning with period and containing the string 'INFO' and containing one of the strings: 'GET', 'POST' or 'HEAD are excerpted usage: find /home/congkhacdung/logrotated/ -type f -iname 'mediatum.*.log' | sort | xargs cat | python bin/stats.py --skip-ip 127.0.0.1 --skip-ip 129.187.87.37 2018 2 """ parser = argparse.ArgumentParser(description='Extract info needed for statistics.') parser.add_argument('--skip-ip', dest='skip_ip', action='append', default=[], help='ip to skip') parser.add_argument('year', type=int, help='year') parser.add_argument('month', type=int, help='month') args = parser.parse_args() period = "{:4}-{:0>2}".format(args.year, args.month) skip_ip = args.skip_ip outdir = os.path.join(config.get("logging.save", config.get("logging.path", "/tmp"))) match = re.compile('^({period}.{{17}}).*(INFO).{{2}}(.*(?:GET|POST|HEAD).*)'.format(period=period)).match lines = sys.stdin lines = imap(match, lines) lines = ifilter(None, lines) lines = imap(operator.methodcaller('groups'), lines) skip_ip_pattern = map("([^0-9.]{}[^0-9.])".format, skip_ip) skip_ip_pattern = '|'.join(skip_ip_pattern) match = re.compile(skip_ip_pattern).match lines = ifilter(lambda g: not match(g[2]), lines) lines = imap(operator.concat, lines, repeat(("\n",))) lines = imap("".join,lines) with tempfile.NamedTemporaryFile(dir=outdir) as tmpfile: tmpfile.writelines(lines) tmpfile.flush() init.full_init() buildStatAll([], period, tmpfile.name)
def __init__(self, cfdUnitID, unitMgrID = 0, cmdrRating = 0, peripheryID = 0, unit = None, **kwargs): super(UnitsListItem, self).__init__() creatorFullName = '' vehiclesNames = tuple() playersCount = 0 commandSize = 0 state = 0 isRosterSet = False if unit: creatorDBID, creator = next(itertools.ifilter(lambda (dbID, p): p['role'] & UNIT_ROLE.COMMANDER_UPDATES > 0, unit._players.iteritems()), (None, None)) if creator is not None: creatorFullName = g_lobbyContext.getPlayerFullName(creator['nickName'], clanAbbrev=creator.get('clanAbbrev'), pDBID=creatorDBID) freeSlots = unit.getFreeSlots() playersSlots = unit.getPlayerSlots() state = unit.getState() vehicles = g_itemsCache.items.getVehicles(REQ_CRITERIA.INVENTORY) matches = unit.getRoster().matchVehicleListToSlotList(vehicles.keys(), freeSlots) vehiclesNames = tuple(itertools.imap(lambda x: vehicles[x].shortUserName, set(matches.keys()))) playersCount = len(playersSlots) commandSize = len(playersSlots) + len(freeSlots) isRosterSet = unit.isRosterSet(ignored=CREATOR_ROSTER_SLOT_INDEXES) self.cfdUnitID = cfdUnitID self.unitMgrID = unitMgrID self.creator = creatorFullName self.rating = cmdrRating self.peripheryID = peripheryID self.playersCount = playersCount self.commandSize = commandSize self.vehicles = vehiclesNames self.state = UnitState(state) self.isRosterSet = isRosterSet return
def add(self, *urls): if not urls: return def readfile(url): with closing(urlopen(url) if url.startswith(('http://', 'https://')) else open(url, 'r')) as f: return f.read().splitlines() clean_playlist = lambda lines: ifilter(None, imap(str.strip, imap(str, lines))) parse_null = lambda url: (url,) parsers = { '.m3u': lambda url: ifilter(lambda line: not line.startswith('#'), readfile(url)), '.pls': lambda url: imap(lambda line: line.split('=')[1], ifilter(lambda line: line.startswith('File'), readfile(url))), } urls = flatten(imap(lambda url: clean_playlist(parsers.get(os.path.splitext(url)[1], parse_null)(url)), urls)) cmds = imap(lambda url: ('addid', url), urls) try: self._mpd.bulk_do(cmds) except CommandError: self.context.notify('error', _('Songs not found')) else: self.refresh()
def compute_possibles(letters, slots, dictionary_words, context): """ computes possible solution words from given dictionary, optionally available slots words - iterator of all words in the dictionary letters - sorted list of letters on board slots - number of available slots returns - iterator of possible solution words """ words = dictionary_words # if we have a known number of slots filter # our word list down to words w/ that manny letters if slots: words = ifilter(f.word_len(slots), words) # filter our word list down to words who's # letters are a subset of the given letters words = ifilter(f.letter_subset(letters), words) # we now have our final iterator of possible solutions return words
def territory_children_codes(territory_code, include_self=False): """ Return a set of subdivision codes from all sub-levels. All returned codes are normalized, including self. """ codes = set() code = normalize_territory_code(territory_code) # We have a country code, look for matching subdivisions in one pass. if code in supported_country_codes(): codes.update(imap( attrgetter('code'), ifilter(lambda subdiv: subdiv.country_code == code, subdivisions))) # Engage the stupid per-level recursive brute-force search as pycountry # only expose the child-parent relationship upwards. else: direct_children_codes = set(imap( attrgetter('code'), ifilter(lambda subdiv: subdiv.parent_code == code, subdivisions))) for child_code in direct_children_codes: codes.update( territory_children_codes(child_code, include_self=True)) if include_self: codes.add(code) return codes
def processImages(self, folder, params): """ Process the images of a directory. This includes computing descriptors as well as training ITQ and computing hash codes. :param folder: A folder to process images on. """ def oldestFileId(item): """ Find the oldest file in an item, and return its id. :param item: An item document, or minimally a dictionary with the item id. :returns: The id of the oldest file, or False if the item has no files. """ files = ModelImporter.model('item').childFiles(item, limit=1, sort=[('created', SortDir.ASCENDING)]) try: return (str(item['_id']), files[0]['_id']) except Exception: return (False, False) # TODO Filter items by supported mime types for SMQTK items = itertools.ifilter(lambda item: 'smqtk_uuid' not in item.get('meta', {}), ModelImporter.model('folder').childItems(folder)) self._processImages(folder, itertools.ifilter(None, itertools.imap(oldestFileId, items)))
def truncatable_primes(): one_digit_primes = set(['2','3','5','7']) forbidden_numbers = set(['4','6','8']) def is_candidate(number): num_str = str(number) if len(num_str) < 2: return False if not set([num_str[0], num_str[-1]]) < one_digit_primes: return False digits = set([ digit for digit in num_str ]) if digits & forbidden_numbers: return False if '2' in digits and num_str[0] != '2': return False if '5' in digits and num_str[0] != '5': return False return True trunc_primes = [] for prime in ifilter(is_truncatable,ifilter(is_candidate,primes)): trunc_primes.append(prime) if len(trunc_primes) == 11: break return trunc_primes
def merge_reference_graph(reference_graph): # create a copy merged_graph = nx.DiGraph(reference_graph) # singletons ? out_d = merged_graph.out_degree() in_d = merged_graph.in_degree() singletons = [x for x, deg in in_d.items() if (deg == 1) and out_d[x] == 1] # bunch them induced = merged_graph.subgraph(singletons).to_undirected() buckets = itertools.ifilter(lambda x: len(x) > 1, nx.networkx.connected_components(induced.to_undirected())) is_entry_point = lambda x: reference_graph.pred[x].items()[0][0] not in singletons is_exit_point = lambda x: reference_graph.succ[x].items()[0][0] not in singletons n_buckets = 0 # Compact them in the merged graph for bunch in buckets: entry_point = reference_graph.predecessors_iter( itertools.ifilter(is_entry_point, bunch).next()).next() # we know there's exactly 1 predecessor and thus 1 entry_point whose pred is in the graph exit_point = reference_graph.successors_iter( itertools.ifilter(is_exit_point, bunch).next()).next() # we know there's exactly 1 successor and thus 1 exit point whose succ exists in the graph # print "Compacting chain (unordered): %s"%" / ".join(bunch) meta_node_lbl = "_".join(bunch) # color the meta node with the 'ref_list' attribut from the first node of the meta node ref_list_meta_node = [] fonction_ref = lambda x: merged_graph.node[x]['ref_list'].keys() for node in bunch: ref_list_meta_node += fonction_ref(node) merged_graph.add_node(meta_node_lbl, length=len(bunch), ref_list=set(ref_list_meta_node)) # je donne un poids au noeud merged_graph.remove_nodes_from(bunch) # j'efface tout merged_graph.add_edge(entry_point, meta_node_lbl) # j'ajoute une arrete entre mon noeud précédent le méta noeud et le méta noeud merged_graph.add_edge(meta_node_lbl, exit_point) # j'ajoute une arrete entre mon noeuds suivant le méta noeud le méta noeud n_buckets += 1 logger.info("Found and compacted %d linear chains", n_buckets) logger.info("Reducing graph size from %d to %d", len(reference_graph), len(merged_graph)) return (merged_graph)
def getSubDevicesGen(self, devfilter=None): """ get all the devices under and instance of a DeviceGroup """ devices = ifilter(lambda dev:self.checkRemotePerm(ZEN_VIEW, dev), self._getSubdevices()) if devfilter: devices = ifilter(devfilter, devices) for device in devices: yield device
def build_graph_from_feature_tuples(X, tol, input_graph=AG): G = nx.Graph() # the list is ordered by layer and input_id nb_layers = X[-1][0][0] + 1 input_node_count = input_graph.number_of_nodes() # Create edges, and crate nodes if needed for i in xrange(nb_layers - 1): # for each current node for c in itertools.ifilter(lambda x: x[0][0] == i and (x[1] > tol or x[1] < -tol), X): input_id = c[0][1] src = input_id + (i * input_node_count) for n in itertools.ifilter(lambda x: x[0][0] == i + 1 and (x[1] > tol or x[1] < -tol), X): tgt_in_id = n[0][1] tgt = tgt_in_id + ((i + 1) * input_node_count) # Check that input_id are real adajcent in the global # adjacency matrix. Beware, ids in adj matrix starts at 0 if input_graph.has_edge(input_id - 1, tgt_in_id - 1) or input_id == tgt_in_id: if src not in G: data = create_node_data(i, input_id, c[1]) G.add_node(src, data) if tgt not in G: data = create_node_data(i + 1, tgt_in_id, n[1]) G.add_node(tgt, data) G.add_edge(src, tgt) return G
def compute_upto(self, n): if n > self.__upto: new_possibles = (i for i in xrange(self.__upto+1, n+1) if i % 2 != 0) # first, loop over all of our existing primes, and remove any definite # non-primes from the possibles for cmp_val in ifilter(lambda x: x*x <= n, self.primeset): # see comment below about partial() and cmp_val new_possibles = ifilter(partial(not_divisible_by,cmp_val), new_possibles) # at this point we know the first thing in new_possibles is prime (or # there isn't anything in new possibles) cmp_val = next_or_none(new_possibles) while cmp_val is not None and cmp_val**2 <= n: #print "Appending: %d" % cmp_val self.primeset.append(cmp_val) # just using cmp_val directly in a lambda causes weird behavior, # because the filter is applied lazily, so the value of cmp_val has changed # by the time it's actually used. using partial evalutes cmp_val to create # a new function. kind of weird, I've never seen this happen in a language # other than javascript before new_possibles = ifilter(partial(not_divisible_by, cmp_val), new_possibles) #print "New Possibles: %s" % str(new_possibles) cmp_val = next_or_none(new_possibles) self.primeset.extend(new_possibles) # if we get here, we should have all the primes up to n self.__upto = n
def partition(pred, iterable): "Use a predicate to partition entries into false entries and true entries" # partition(is_odd, range(10)) --> 0 2 4 6 8 and 1 3 5 7 9 t1, t2 = itertools.tee(iterable) return itertools.ifilterfalse(pred, t1), itertools.ifilter(pred, t2)
def open_filter_load(pattern='*.root', filter_keyfunc=None): wrps = dir_content(pattern) wrps = itertools.ifilter(filter_keyfunc, wrps) wrps = load(wrps) return wrps
elif x.count('out of memory'): writeline = False printLog('ERROR: Omitting line from table - problem is too large') #speedStr = 'GB/s' #if args.routine == 'stablesort': # speedStr = 'MKeys/s' #if args.routine == 'sort': # speedStr = 'MKeys/s' #speedStr = 'GB/s' speedStr = 'MKeys/s' if writeline: try: output = itertools.ifilter(lambda x: x.count(speedStr), output) output = list(itertools.islice(output, None)) thisResult = re.search('\d+\.*\d*e*-*\d*$', output[-1]) thisResult = float(thisResult.group(0)) thisResult = (params.x, params.device, params.precision, params.label, thisResult) outputRow = '' for x in thisResult: outputRow = outputRow + str(x) + ',' outputRow = outputRow.rstrip(',') table.write(outputRow + '\n') table.flush() except: printLog('ERROR: Exception occurs in GFLOP parsing') else:
def count(self, value): return sum( [1 for _ in itertools.ifilter(lambda v: v == value, iter(self))])
exclude_unit_path = args.exclude_unit_dir + "_".join( coprus_and_lang) + ".tsv" sys.stderr.write('reading ignore unit ids: %s\n' % exclude_unit_path) exclude_unit_ids = get_unit_id_set(exclude_unit_path) else: exclude_unit_ids = set() annotations += data.load_unambiguous_annotations( ssc_file, exclude_unit_ids) # Filtering out annotations that have multiple groups sys.stderr.write('filtering out multi-group annotations\n') annotations = list( itertools.ifilter(lambda x: isinstance(x.get_group_number(), int), annotations)) classifier = models.OptionAwareNaiveBayesLeftRightCutoff(window_size=5, cutoff=9) sys.stderr.write('training classifier\n') classifier.train(annotations) sys.stderr.write('processing probabilities\n') # If exclude folder is given, find a list of exclude units for the ANNOTATED # file and only leave those if args.exclude_unit_dir: coprus_and_lang = get_corpus_and_language(args.annotate) exclude_unit_path = args.exclude_unit_dir + "_".join( coprus_and_lang) + ".tsv"
def region_mask(cube, region_name): # mask cube to country import cartopy.io.shapereader as shpreader import itertools from iris.analysis.geometry import geometry_area_weights import numpy.ma as ma ### Guess bounds if currently not specified if cube.coord('latitude').bounds == None: cube.coord('latitude').guess_bounds() if cube.coord('longitude').bounds == None: cube.coord('longitude').guess_bounds() # get countries (resolution = 10m, 50m, 110m ) shpfilename = shpreader.natural_earth(category='cultural', name='admin_0_countries', resolution='110m') reader = shpreader.Reader(shpfilename) # list available attributes all_countries = reader.records() country = next(all_countries) # print(country.attributes.keys()) # get all values of an attribute key = 'name_long' values = set() all_countries = reader.records() for country in all_countries: values.add(country.attributes[key]) # print( key+': '+ ', '.join(values) ) # extract countries matching criteria - is there an easier way??? country_crit = lambda country: country.attributes[ 'name_long'] == region_name ## e.g., 'China' # country_crit = lambda country: country.attributes['continent'] == 'Asia' # country_crit = lambda country: country.attributes['region_un'] == 'Asia' # country_crit = lambda country: country.attributes['subregion'] == 'Eastern Asia' all_countries = reader.records() countries = itertools.ifilter(country_crit, all_countries) # work out area weights of single field's intersection with selected countries # !!! need to make generic (get first field from cube) country = next(countries) print('Getting field intersection area with ' + country.attributes['name_long']) area_weights = geometry_area_weights(cube, country.geometry) for country in countries: print('Getting field intersection area with ' + country.attributes['name_long']) area_weights += geometry_area_weights(cube, country.geometry) # create a mask from the area weights mask = np.where(area_weights > 0, False, True) masked_cube = cube.copy() # NB: this combines the mask and the data's existing mask as required masked_cube.data = ma.array(masked_cube.data, mask=mask) return masked_cube
def main(): if len(sys.argv) < 3: sys.exit("use: %s gff variants" % sys.argv[0]) # load all the annotations, because pybedtools is broken.. annotations = pybedtools.BedTool(sys.argv[1]) annotations._isbam = False an_tabix = pysam.Tabixfile(sys.argv[1]) variants = vcf.Reader(filename=sys.argv[2]) print_header() # for each mrna pick all exons with Target attribute # and all variants in the mrna # for each variant, if in exon, output with exact coords, # otherwise output with NA and mrna block def has_target(i): return 'Target' in i.attrs def exon_target(i): target = i.attrs['Target'] chrom, start, end, _ = target.split(" ", 4) return pybedtools.Interval(chrom, int(start), int(end)) def is_type(i, itype): return i.fields[2] == itype def is_source(i, src): return i.fields[1] == src for mrna in itertools.ifilter(lambda x: is_type(x, 'mRNA'), annotations): feats = fetch_intervals(an_tabix, mrna) exons0 = filter(has_target, filter(lambda x: is_type(x, 'exon'), feats)) # pick only the exons mapping to the putative chromosome exons = filter(lambda x: exon_target(x).chrom == mrna.chrom, exons0) # no exons means no chromosome information, we don't need such variants if not exons: continue # use features from mvz pipeline mvz = filter(lambda x: is_source(x, 'mvz-annot'), feats) # pick the exon range exon_min = min(exon_target(x).start for x in exons) exon_max = max(exon_target(x).end for x in exons) mvars = list(variants.fetch(mrna.chrom, mrna.start, mrna.end)) for var in mvars: f = list(pybed_find_features(exons, var.CHROM, var.POS)) # if found a mapped exon, translate the coordinates # relative to the exon start if f: # pick the lowest exon minex = f[0] refpos = "%d" % (exon_target(minex).start + var.POS - minex.start) else: refpos = "NA" # list of feature types from mvz annotation pipeline (CDS, 3utr, 5utr..) mvzf = list(pybed_find_features(mvz, var.CHROM, var.POS)) if mvzf: mvz_type = ",".join(i.fields[2] for i in mvzf) else: mvz_type = "NA" # try to pick mvz CDS feature and extract the Name(s) from it mvz_cds = filter(lambda x: is_type(x, 'CDS'), mvzf) if mvz_cds: try: mvz_name = ",".join(i.attrs['Name'] for i in mvz_cds if 'Name' in i.attrs) except: print >> sys.stderr, "error at var:", var.CHROM, var.POS else: mvz_name = "NA" print "\t".join( map(str, [ var.CHROM, var.POS, var.QUAL, var.FILTER, var.var_type, var.var_subtype, var.INFO['DP'], "\t".join(str(x) for x in var.INFO['DP4']), mrna.name, mvz_name, mrna.end - mrna.start, exon_min, exon_max, refpos, mvz_type, ]))
def slaves(self, fltr=""): return list( map( lambda x: slave.MesosSlave(x), itertools.ifilter(lambda x: fltr in x["id"], self.state["slaves"])))
def mindmap_to_blog(mindmap, do_cleanup=False): #=========================================================== # cleanup previous markdown files #=========================================================== # {{{ if do_cleanup: print '\n## cleanup old markdown files created by freeplane.py blog mode' dpath = os.path.dirname(mindmap.fpath) for root, ls_dname, ls_fname in os.walk(dpath): for fname in ls_fname: if (fname.endswith('.md')): # is Markdown file fpath = os.path.join(root, fname) with open(fpath, 'r') as f: f.seek(-2 * len(blog_mode_footnote), os.SEEK_END) last = f.read().strip() if last.endswith(blog_mode_footnote): # is generated, so remove the markdown file print '### remove file ' + fpath os.remove(fpath) # }}} #=========================================================== # find blog mode settings #=========================================================== # {{{ print '\n## analyze blog mode settings' setting_root = next(itertools.ifilter(lambda x: x.core.lower() == 'blog mode settings', mindmap.root.ls_sub), None) assert (setting_root != None), 'Error: cannot find "blog mode settings" node' #------------------------------------------------------- # node type & link type node_type = dict() link_type = dict() for node in itertools.islice(iter(setting_root), 1, None): type_name = node.core.lower() if (type_name == 'special nodes'): continue if (type_name == 'link file types'): for (keyword, value) in node.attribute.items(): link_type[keyword] = value.split() continue try: node_type[type_name].append(node) except KeyError: node_type[type_name] = [node] print '### node type' for (type_name, ls_node) in node_type.items(): s = '%s = ' % type_name for node in ls_node: s += node.format + '|' print s.strip('|') print '### link type' for (type_name, ls_pattern) in link_type.items(): s = '%s = ' % type_name for pattern in ls_pattern: s += pattern + '|' print s.strip('|') #------------------------------------------------------- # article's default meta-data article_setting = next(itertools.ifilter(lambda n: n.core.lower() == 'article', setting_root), None) assert article_setting != None, 'Error: cannot find "article" setting node' default_metadata = dict() for (key, value) in article_setting.attribute.items(): default_metadata[key.lower()] = value print '### default meta-data' for (key, value) in default_metadata.items(): print '%s = %s' % (key, value) # }}} #=========================================================== # set node type according to their format matching with blog mode settings #=========================================================== # {{{ print '\n## match node format' # node_type for node in iter(mindmap.root): node.type = '' for (type_name, ls_node_setting) in node_type.items(): for node_setting in ls_node_setting: if (node.format == node_setting.format): node.type = type_name break if len(node.type) > 0: break if (node.type == ''): if node.core.startswith('- '): node.type = 'list' if (node.parent.type != 'list'): node.list_level = 1 else: node.list_level = node.parent.list_level + 1 for node in iter(setting_root): node.type = '' mindmap.root.type = '' # link_type for node in itertools.ifilter(lambda n: len(n.link) > 0, mindmap.root): node.link_type = '' for (type_name, ls_link_type_pattern) in link_type.items(): for link_type_pattern in ls_link_type_pattern: if (fnmatch.fnmatch(node.core, link_type_pattern)): node.link_type = type_name break if len(node.link_type) > 0: break # }}} #=========================================================== # analyze article #=========================================================== # {{{ print '\n## analyze blog articles' for node in itertools.ifilter(lambda n: n.type == 'article', mindmap.root): print '\n### analyze article:\n' + node.core.encode('utf-8') #------------------------------------------------------- # article file path dpath = os.path.dirname(mindmap.fpath) if node.parent.type == 'directory': directory = node.parent.core.replace(' ', '-').replace(':', '..') dpath = os.path.join(dpath, directory) #------------------------------------------------------- # article meta-data metadata = dict() for (key, value) in node.attribute.items(): metadata[key.lower()] = value metadata['title'] = node.core metadata['category'] = directory.replace(' ', '-') if ('slug' not in metadata.keys()): metadata['slug'] = metadata['title'].lower().replace(' ', '-') # set default meta-data for (keyword, value) in default_metadata.items(): if keyword not in metadata.keys(): metadata[keyword] = value if ('modified' not in metadata.keys()) or (metadata['modified'] == ''): if ('created' in metadata.keys()): metadata['modified'] = metadata['created'] if ('created' in metadata.keys()): metadata['date'] = metadata['created'] fname = metadata['slug'] + '.md' fpath = os.path.join(dpath, fname) #------------------------------------------------------- # write article if not os.path.isdir(dpath): os.mkdir(dpath) # title content = 'title: ' + metadata['title'] + '\n' # meta-data for (key, value) in metadata.items(): if (key == 'title'): continue content += '%s: %s\n' % (key, value) content += '\n' #------------------------------------------------------- # sub node content {{{ for n in itertools.islice(iter(node), 1, None): if n.type == 'skip': continue if n.type == 'comment': map(lambda m: setattr(m, 'type', 'comment'), n.ls_sub) continue # deal with links if len(n.link) > 0: # solve relative link if n.link.startswith('http://'): # http link link = n.link else: # local files # copy local files to directory it supposed to be in: "<directory>/<slug>/", and modify the link in original mindmap file exp_link = directory + '/' + metadata['slug'] + '/' + n.core.replace(' ', '_') if (n.link != exp_link): old_fpath = os.path.join(os.path.dirname(mindmap.fpath), n.link).replace(r'%20', r' ') new_fpath = os.path.join(os.path.dirname(mindmap.fpath), exp_link) if (os.path.dirname(n.link) == os.path.dirname(exp_link)): # just filename is different, rename print 'Info: file (%s) is renamed to (%s)' % (old_fpath, new_fpath) os.rename(old_fpath, new_fpath) else: # copy print 'Info: file (%s) is copied to (%s)' % (old_fpath, new_fpath) if not os.path.isdir(os.path.dirname(new_fpath)): os.mkdir(os.path.dirname(new_fpath)) shutil.copyfile(old_fpath, new_fpath) # change the link mindmap.is_changed = True before = 'LINK="' + n.link + '"' after = 'LINK="' + exp_link + '"' mindmap.ls_change.append((before, after)) link = metadata['slug'] + '/' + n.core.replace(' ', '_') if n.link_type == 'image': content += '![%s](%s)\n' % (n.core, link) else: content += '[%s](%s)\n' % (n.core, link) continue # deal with special nodes prefix = '' suffix = '' if n.type == 'section': prefix = '# ' elif n.type == 'subsection': prefix = '## ' elif n.type == 'subsubsection': prefix = '### ' elif n.type == 'paragraph': prefix = '#### ' elif n.type == 'subparagraph': prefix = '##### ' elif n.type == 'code': prefix = '~~~\n' suffix = '\n~~~' elif n.type == 'list': prefix = ' ' * (n.list_level - 1) suffix = '' content += prefix + n.core + suffix + '\n' if len(n.note) > 0: content += '\n' + n.note + '\n' # }}} content += blog_mode_footnote #------------------------------------------------------- # write the markdown file {{{ f = open(fpath, 'w') print >> f, content.encode('utf-8') f.close() print '### wrote to ' + fpath # }}} # }}} #=========================================================== # modify original mindmap and backup #=========================================================== # {{{ if len(mindmap.ls_change) == 0: pass else: print '\n## change mindmap with backup' # backup prefix = datetime.datetime.now().strftime('%Y%m%d_%H%M%S.') bak_fpath = os.path.join(os.path.dirname(mindmap.fpath), 'bak', prefix + os.path.basename(mindmap.fpath)) if not os.path.isdir(os.path.dirname(bak_fpath)): os.mkdir(os.path.dirname(bak_fpath)) shutil.copyfile(mindmap.fpath, bak_fpath) print '### mindmap file backup at %s' % bak_fpath # replace mindmap with open(mindmap.fpath, 'r') as f: ls_lines = f.readlines() for (before, after) in mindmap.ls_change: print '### (%s) >> (%s)' % (before, after) for (i, line) in enumerate(ls_lines): ls_lines[i] = line.replace(before, after) with open(mindmap.fpath, 'w') as f: f.writelines(ls_lines) # }}} #=========================================================== # cleanup not used attachment dir #=========================================================== # {{{ if (do_cleanup): print '\n## cleanup not used attachment dir' dpath = os.path.dirname(mindmap.fpath) for root, ls_dname, ls_fname in os.walk(dpath): # skip the first level if (root == dpath) or (os.path.basename(root) == 'bak'): continue st_dname = set(ls_dname) st_slug = set(map(lambda x: x[:-3], ls_fname)) st_not_used = st_dname - st_slug for dname in st_not_used: dpath = os.path.join(root, dname) print '### remove dir ' + dpath shutil.rmtree(dpath)
def _sieve(stream): # just for fun; doesn't work over a few hundred val = stream.next() yield val for x in ifilter(lambda x: x%val != 0, _sieve(stream)): yield x
def filter_out_dummy(iterable): return itertools.ifilter(lambda x: x != dummy_fill_value, iterable)
def compute(self, split): if self.err < 1e-8: return itertools.ifilter(self.func, self.prev.iterator(split)) return self._compute_with_error(split)
def slaves(self, fltr=""): return list( map( lambda x: slave.MesosSlave(self.config, x), itertools.ifilter(lambda x: fltr == x['id'], self.state['slaves'])))
def ParseResults(ycsb_result_string, data_type='histogram'): """Parse YCSB results. Example input for histogram datatype: YCSB Client 0.1 Command line: -db com.yahoo.ycsb.db.HBaseClient -P /tmp/pkb/workloada [OVERALL], RunTime(ms), 1800413.0 [OVERALL], Throughput(ops/sec), 2740.503428935472 [UPDATE], Operations, 2468054 [UPDATE], AverageLatency(us), 2218.8513395574005 [UPDATE], MinLatency(us), 554 [UPDATE], MaxLatency(us), 352634 [UPDATE], 95thPercentileLatency(ms), 4 [UPDATE], 99thPercentileLatency(ms), 7 [UPDATE], Return=0, 2468054 [UPDATE], 0, 398998 [UPDATE], 1, 1015682 [UPDATE], 2, 532078 ... Example input for hdrhistogram datatype: YCSB Client 0.12.0 Command line: -db com.yahoo.ycsb.db.RedisClient -P /opt/pkb/workloadb [OVERALL], RunTime(ms), 29770.0 [OVERALL], Throughput(ops/sec), 33590.86328518643 [UPDATE], Operations, 49856.0 [UPDATE], AverageLatency(us), 1478.0115532734276 [UPDATE], MinLatency(us), 312.0 [UPDATE], MaxLatency(us), 24623.0 [UPDATE], 95thPercentileLatency(us), 3501.0 [UPDATE], 99thPercentileLatency(us), 6747.0 [UPDATE], Return=OK, 49856 ... Example input for ycsb version after 0.13.0: ... Command line: -db com.yahoo.ycsb.db.HBaseClient10 ... -load YCSB Client 0.14.0 Loading workload... Starting test. ... [OVERALL], RunTime(ms), 11411 [OVERALL], Throughput(ops/sec), 8763.473841030585 [INSERT], Operations, 100000 [INSERT], AverageLatency(us), 74.92 [INSERT], MinLatency(us), 5 [INSERT], MaxLatency(us), 98495 [INSERT], 95thPercentileLatency(us), 42 [INSERT], 99thPercentileLatency(us), 1411 [INSERT], Return=OK, 100000 ... Example input for timeseries datatype: ... [OVERALL], RunTime(ms), 240007.0 [OVERALL], Throughput(ops/sec), 10664.605615669543 ... [READ], Operations, 1279253 [READ], AverageLatency(us), 3002.7057071587874 [READ], MinLatency(us), 63 [READ], MaxLatency(us), 93584 [READ], Return=OK, 1279281 [READ], 0, 528.6142757498257 [READ], 500, 360.95347448674966 [READ], 1000, 667.7379547689283 [READ], 1500, 731.5389357265888 [READ], 2000, 778.7992281717318 ... Args: ycsb_result_string: str. Text output from YCSB. data_type: Either 'histogram' or 'timeseries' or 'hdrhistogram'. 'histogram' and 'hdrhistogram' datasets are in the same format, with the difference being lacking the (millisec, count) histogram component. Hence are parsed similarly. Returns: A dictionary with keys: client: containing YCSB version information. command_line: Command line executed. groups: list of operation group descriptions, each with schema: group: group name (e.g., update, insert, overall) statistics: dict mapping from statistic name to value histogram: list of (ms_lower_bound, count) tuples, e.g.: [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms, and 1 took between 19ms and 20ms. Empty bins are not reported. Raises: IOError: If the results contained unexpected lines. """ # TODO: YCSB 0.9.0 output client and command line string to stderr, so # we need to support it in the future. lines = [] client_string = 'YCSB' command_line = 'unknown' fp = io.BytesIO(ycsb_result_string) result_string = next(fp).strip() def IsHeadOfResults(line): return line.startswith('[OVERALL]') while not IsHeadOfResults(result_string): if result_string.startswith('YCSB Client 0.'): client_string = result_string if result_string.startswith('Command line:'): command_line = result_string try: result_string = next(fp).strip() except StopIteration: raise IOError( 'Could not parse YCSB output: {}'.format(ycsb_result_string)) if result_string.startswith('[OVERALL]'): # YCSB > 0.7.0. lines.append(result_string) else: # Received unexpected header raise IOError('Unexpected header: {0}'.format(client_string)) # Some databases print additional output to stdout. # YCSB results start with [<OPERATION_NAME>]; # filter to just those lines. def LineFilter(line): return re.search(r'^\[[A-Z]+\]', line) is not None lines = itertools.chain(lines, itertools.ifilter(LineFilter, fp)) r = csv.reader(lines) by_operation = itertools.groupby(r, operator.itemgetter(0)) result = collections.OrderedDict([('client', client_string), ('command_line', command_line), ('groups', collections.OrderedDict())]) for operation, lines in by_operation: operation = operation[1:-1].lower() if operation == 'cleanup': continue op_result = {'group': operation, data_type: [], 'statistics': {}} latency_unit = 'ms' for _, name, val in lines: name = name.strip() val = val.strip() # Drop ">" from ">1000" if name.startswith('>'): name = name[1:] val = float(val) if '.' in val or 'nan' in val.lower() else int( val) if name.isdigit(): if val: if data_type == TIMESERIES and latency_unit == 'us': val /= 1000.0 op_result[data_type].append((int(name), val)) else: if '(us)' in name: name = name.replace('(us)', '(ms)') val /= 1000.0 latency_unit = 'us' op_result['statistics'][name] = val result['groups'][operation] = op_result return result
def processes(self, workflow=None, tag=None): return ifilter( lambda p: (p.workflow == workflow or not workflow) and (tag in p.tags or not tag), self.running_processes.values())
def multi_substitute(word, rules): """ Apply multiple regex rules to 'word' http://code.activestate.com/recipes/ 576710-multi-regex-single-pass-replace-of-multiple-regexe/ """ flags = rules[0]['flags'] # Create a combined regex from the rules tuples = ((p, r['match']) for p, r in enumerate(rules)) regexes = ('(?P<match_%i>%s)' % (p, r) for p, r in tuples) pattern = '|'.join(regexes) regex = re.compile(pattern, flags) resplit = re.compile('\$(\d+)') # For each match, look-up corresponding replace value in dictionary rules_in_series = ifilter(itemgetter('series'), rules) rules_in_parallel = (r for r in rules if not r['series']) try: has_parallel = [rules_in_parallel.next()] except StopIteration: has_parallel = [] # print('================') # pprint(rules) # print('word:', word) # print('pattern', pattern) # print('flags', flags) for _ in chain(rules_in_series, has_parallel): # print('~~~~~~~~~~~~~~~~') # print('new round') # print('word:', word) # found = list(regex.finditer(word)) # matchitems = [match.groupdict().items() for match in found] # pprint(matchitems) prev_name = None prev_is_series = None i = 0 for match in regex.finditer(word): item = ifilter(itemgetter(1), match.groupdict().iteritems()).next() # print('----------------') # print('groupdict:', match.groupdict().items()) # print('item:', item) if not item: continue name = item[0] rule = rules[int(name[6:])] series = rule.get('series') kwargs = {'count': rule['count'], 'series': series} is_previous = name is prev_name singlematch = kwargs['count'] is 1 is_series = prev_is_series or kwargs['series'] isnt_previous = bool(prev_name) and not is_previous if (is_previous and singlematch) or (isnt_previous and is_series): continue prev_name = name prev_is_series = series if resplit.findall(rule['replace']): splits = resplit.split(rule['replace']) words = _gen_words(match, splits) else: splits = rule['replace'] start = match.start() + i end = match.end() + i words = [word[:start], splits, word[end:]] i += rule['offset'] # words = list(words) word = ''.join(words) # print('name:', name) # print('prereplace:', rule['replace']) # print('splits:', splits) # print('resplits:', resplit.findall(rule['replace'])) # print('groups:', filter(None, match.groups())) # print('i:', i) # print('words:', words) # print('range:', match.start(), '-', match.end()) # print('replace:', word) # print('substitution:', word) return word
def selectColumns(self, row): outRows = imap( lambda i:row[i], ifilter(lambda i: i in self._getTransColumns(), range(len(row)))) return outRows
address_types = {ArrayType, StringType, FunctionType, AddressType} def size_arrays_as_pointers(ctype, overrides=()): return size( ctype, overrides=dict(chain( izip(address_types, repeat(size(void_pointer_type))), getattr(overrides, 'iteritems', lambda: overrides)() )) ) float_sizes_to_words = dict(izip(imap(word_type_sizes.__getitem__, float_names), float_names)) # float word sizes float_ctypes = set(ifilter(lambda cls: issubclass(cls, FloatType), rules(numeric_type_size).iterkeys())) # CFloat types float_ctypes_word_types = dict(izip( # Convert CType to its size then convert that size to machine word type name float_ctypes, imap(float_sizes_to_words.__getitem__, imap(rules(numeric_type_size).__getitem__, float_ctypes)) )) strictly_unsigned_ctypes = set( ifilter(lambda cls: issubclass(cls, StrictlyUnsigned), rules(numeric_type_size).iterkeys()) ) - float_ctypes # just to be safe even though floats are strictly signed! strictly_signed_ctypes = set( ifilter(lambda cls: issubclass(cls, StrictlySigned), rules(numeric_type_size).iterkeys()) ) - float_ctypes integral_ctypes = set(ifilter(lambda cls: issubclass(cls, IntegralType), rules(numeric_type_size).iterkeys())) unsigned_ctypes_to_words = dict(izip(imap(word_type_sizes.__getitem__, word_names), word_names))
url = 'https://api.import.io/store/data/c1ea6c12-c034-4b99-9252-81a5395c1701/_query?input/webpage/url=http://www.groupon.com/browse/' + city + '?context=local&' + userkeys return url backup_page_list = [] deals_list = [] page_data_list = [] # with open('Updated_sample_groupon_page_data_test.json', 'w') as jsonfile: # json.dump(page_data_list, jsonfile, encoding='utf8') for c in cities[21:23]: print c try: trial = [json.load(urlopen(total_page_extractor(c)))['results'] for each in range(0, 10)] trial = itertools.ifilter(None, trial).next() for page in trial: total_pages = int(page['total_page_numb/_text']) print "%s total pages for %s" % (total_pages, c) # print type(total_pages) # print trial for s in range(1, total_pages + 1): city = c page_number = str(s) # print page_number deal_list_crawler = 'https://api.import.io/store/data/affd236e-be39-4952-9100-d7f3564add10/_query?input/webpage/url=http://www.groupon.com/browse%2F' + city + '%3Fcontext%3Dlocal%26page%3D' + page_number + '&' + userkeys result = json.load(urlopen(deal_list_crawler))['results'] while True: if result != []: for d in result: del d['all_deals_links/_source']
def not_none(it): return itertools.ifilter(None, it)
def partition(pred, iterable): t1, t2 = itertools.tee(iterable) return (list(itertools.ifilterfalse(pred, t1)), list(itertools.ifilter(pred, t2)))
def all_tids(self): free = self.free_list() n = len(self._chunks) * DEFAULT_CHUNK_SIZE return ifilter(lambda x: x not in free, imap(self._make_tid, xrange(n)))
def iter_blocks(self): return ifilter(methodcaller('is_block'), self.get_elements())
def iter_connections(self): return ifilter(methodcaller('is_connection'), self.get_elements())
def first(condition, seq): try: return next(ifilter(condition, seq)) except StopIteration: return None
if isValid(sumNs, mcmTot, p): yield s ## ****************** MAIN ****************** limit = 80 cands = range(2, limit + 1) denoms = [x**2 for x in cands] mcmTot = mcm(denoms) print mcmTot ## Global variable, used everywhere. The cases 0 and 1 are manually set. val = [None, mcmTot] + [mcmTot // d for d in denoms] ## Removing candidates that don't appear in any combination: sRemove = set([]) for p in ifilter(isPrime, xrange(5, limit + 1)): sFound = set(range(p, limit + 1, p)) for s in possibleP(p): sFound.difference_update(p * el for el in s) sRemove.update(sFound) for el in sRemove: cands.remove(el) print "Candidates: %s" % cands ## Now, we divide the candidates in approx. 2 halves: denoms1 = [x**2 for x in cands[:len(cands) // 2]] denoms2 = [x**2 for x in cands[len(cands) // 2:]] mcm_1 = mcm(denoms1) mcm_2 = mcm(denoms2) mcmTot = mcm([mcm_1, mcm_2]) mcmTot2 = mcmTot // 2
def plotFromDataFile(): data = [] """ read in table(s) from file(s) """ for thisFile in args.datafile: if not os.path.isfile(thisFile): print 'No file with the name \'{}\' exists. Please indicate another filename.'.format( thisFile) quit() results = open(thisFile, 'r') results_contents = results.read() results_contents = results_contents.rstrip().split('\n') firstRow = results_contents.pop(0) print firstRow print blas_table_header() print firstRow.rstrip() == blas_table_header() if firstRow.rstrip() != blas_table_header(): print 'ERROR: input file \'{}\' does not match expected format.'.format( thisFile) quit() for row in results_contents: row = row.split(',') row = TableRow( BlasTestCombination(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], row[11], row[12], row[13], row[14], row[15], row[16], row[17][1:], row[17][0], row[18], row[19], row[20]), row[21]) data.append( BlasGraphPoint( row.parameters.sizem, row.parameters.sizen, row.parameters.sizek, row.parameters.lda, row.parameters.ldb, row.parameters.ldc, row.parameters.offa, row.parameters.offb, row.parameters.offc, row.parameters.device, row.parameters.order, row.parameters.transa, row.parameters.transb, row.parameters.precision + row.parameters.function, row.parameters.library, row.parameters.label, row.gflops)) """ data sanity check """ # if multiple plotvalues have > 1 value among the data rows, the user must specify which to plot multiplePlotValues = [] for option in plotvalues: values = [] for point in data: values.append(getattr(point, option)) multiplePlotValues.append(len(set(values)) > 1) if multiplePlotValues.count(True) > 1 and args.plot == None: print 'ERROR: more than one parameter of {} has multiple values. Please specify which parameter to plot with --plot'.format( plotvalues) quit() # if args.graphxaxis is not 'problemsize', the user should know that the results might be strange #if args.graphxaxis != 'problemsize': # xaxisvalueSet = [] # for option in xaxisvalues: # if option != 'problemsize': # values = [] # for point in data: # values.append(getattr(point, option)) # xaxisvalueSet.append(len(set(values)) > 1) # if xaxisvalueSet.count(True) > 1: # print 'WARNING: more than one parameter of {} is varied. unexpected results may occur. please double check your graphs for accuracy.'.format(xaxisvalues) # multiple rows should not have the same input values #pointInputs = [] #for point in data: # pointInputs.append(point.__str__().split(';')[0]) #if len(set(pointInputs)) != len(data): # print 'ERROR: imported table has duplicate rows with identical input parameters' # quit() """ figure out if we have multiple plots on this graph (and what they should be) """ if args.plot != None: multiplePlots = args.plot elif multiplePlotValues.count(True) == 1 and plotvalues[ multiplePlotValues.index(True)] != 'sizek': # we don't ever want to default to sizek, because it's probably going to vary for most plots # we'll require the user to explicitly request multiple plots on sizek if necessary multiplePlots = plotvalues[multiplePlotValues.index(True)] else: # default to device if none of the options to plot have multiple values multiplePlots = 'device' """ assemble data for the graphs """ data.sort(key=lambda row: int(getattr(row, args.graphxaxis))) # choose scale for x axis if args.xaxisscale == None: # user didn't specify. autodetect if int(getattr(data[len(data) - 1], args.graphxaxis)) > 2000: # big numbers on x-axis args.xaxisscale = 'log2' elif int(getattr(data[len(data) - 1], args.graphxaxis)) > 10000: # bigger numbers on x-axis args.xaxisscale = 'log10' else: # small numbers on x-axis args.xaxisscale = 'linear' if args.xaxisscale == 'linear': plotkwargs = {} plottype = 'plot' elif args.xaxisscale == 'log2': plottype = 'semilogx' plotkwargs = {'basex': 2} elif args.xaxisscale == 'log10': plottype = 'semilogx' plotkwargs = {'basex': 10} else: print 'ERROR: invalid value for x-axis scale' quit() plots = set(getattr(row, multiplePlots) for row in data) class DataForOnePlot: def __init__(self, inlabel, inxdata, inydata): self.label = inlabel self.xdata = inxdata self.ydata = inydata dataForAllPlots = [] for plot in plots: dataForThisPlot = itertools.ifilter( lambda x: getattr(x, multiplePlots) == plot, data) dataForThisPlot = list(itertools.islice(dataForThisPlot, None)) #if args.graphxaxis == 'problemsize': # xdata = [int(row.x) * int(row.y) * int(row.z) * int(row.batchsize) for row in dataForThisPlot] #else: xdata = [getattr(row, args.graphxaxis) for row in dataForThisPlot] ydata = [getattr(row, args.graphyaxis) for row in dataForThisPlot] dataForAllPlots.append(DataForOnePlot(plot, xdata, ydata)) """ assemble labels for the graph or use the user-specified ones """ if args.graphtitle: # use the user selection title = args.graphtitle else: # autogen a lovely title title = 'Performance vs. ' + args.graphxaxis.capitalize() if args.xaxislabel: # use the user selection xaxislabel = args.xaxislabel else: # autogen a lovely x-axis label if args.graphxaxis == 'cachesize': units = '(bytes)' else: units = '(datapoints)' xaxislabel = args.graphxaxis + ' ' + units if args.yaxislabel: # use the user selection yaxislabel = args.yaxislabel else: # autogen a lovely y-axis label if args.graphyaxis == 'gflops': units = 'GFLOPS' yaxislabel = 'Performance (' + units + ')' """ display a pretty graph """ colors = ['k', 'y', 'm', 'c', 'r', 'b', 'g'] for thisPlot in dataForAllPlots: getattr(pylab, plottype)(thisPlot.xdata, thisPlot.ydata, '{}.-'.format(colors.pop()), label=thisPlot.label, **plotkwargs) if len(dataForAllPlots) > 1: pylab.legend(loc='best') pylab.title(title) pylab.xlabel(xaxislabel) pylab.ylabel(yaxislabel) pylab.grid(True) if args.outputFilename == None: # if no pdf output is requested, spit the graph to the screen . . . pylab.show() else: # . . . otherwise, gimme gimme pdf #pdf = PdfPages(args.outputFilename) #pdf.savefig() #pdf.close() pylab.savefig(args.outputFilename, dpi=(1024 / 8))
def _update_parameter_controls(self): for control in ifilter(None, self._parameter_controls or []): control.set_channel(self._bank_index) return
def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs') arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file') arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file') arg_parser.add_argument('--include-unmatched', action='store_true', help='include results from XML files that were not recognized by configuration matchers') arg_parser.add_argument('--show-times-per-pixel', action='store_true', help='for tests that have an image size parameter, show per-pixel time, as well as total time') args = arg_parser.parse_args() logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) if args.config is not None: with open(args.config) as global_conf_file: global_conf = ast.literal_eval(global_conf_file.read()) else: global_conf = {} wb = xlwt.Workbook() for sheet_path in args.sheet_dirs: try: with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file: sheet_conf = ast.literal_eval(sheet_conf_file.read()) except IOError as ioe: if ioe.errno != errno.ENOENT: raise sheet_conf = {} logging.debug('no sheet.conf for %s', sheet_path) sheet_conf = dict(global_conf.items() + sheet_conf.items()) config_names = sheet_conf.get('configurations', []) config_matchers = sheet_conf.get('configuration_matchers', []) collector = Collector(make_match_func(config_matchers), args.include_unmatched) for root, _, filenames in os.walk(sheet_path): logging.info('looking in %s', root) for filename in fnmatch.filter(filenames, '*.xml'): if os.path.normpath(sheet_path) == os.path.normpath(root): default_conf = None else: default_conf = os.path.relpath(root, sheet_path) collector.collect_from(os.path.join(root, filename), default_conf) config_names.extend(sorted(collector.extra_configurations - set(config_names))) sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path)))) sheet_properties = sheet_conf.get('sheet_properties', []) sheet.write(0, 0, 'Properties:') sheet.write(0, 1, 'N/A' if len(sheet_properties) == 0 else ' '.join(str(k) + '=' + repr(v) for (k, v) in sheet_properties)) sheet.row(2).height = 800 sheet.panes_frozen = True sheet.remove_splits = True sheet_comparisons = sheet_conf.get('comparisons', []) row = 2 col = 0 for (w, caption) in [ (2500, 'Module'), (10000, 'Test'), (2000, 'Image\nwidth'), (2000, 'Image\nheight'), (2000, 'Data\ntype'), (7500, 'Other parameters')]: sheet.col(col).width = w if args.show_times_per_pixel: sheet.write_merge(row, row + 1, col, col, caption, header_style) else: sheet.write(row, col, caption, header_style) col += 1 for config_name in config_names: if args.show_times_per_pixel: sheet.col(col).width = 3000 sheet.col(col + 1).width = 3000 sheet.write_merge(row, row, col, col + 1, config_name, header_style) sheet.write(row + 1, col, 'total, ms', subheader_style) sheet.write(row + 1, col + 1, 'per pixel, ns', subheader_style) col += 2 else: sheet.col(col).width = 4000 sheet.write(row, col, config_name, header_style) col += 1 col += 1 # blank column between configurations and comparisons for comp in sheet_comparisons: sheet.col(col).width = 4000 caption = comp['to'] + '\nvs\n' + comp['from'] if args.show_times_per_pixel: sheet.write_merge(row, row + 1, col, col, caption, header_style) else: sheet.write(row, col, caption, header_style) col += 1 row += 2 if args.show_times_per_pixel else 1 sheet.horz_split_pos = row sheet.horz_split_first_visible = row module_colors = sheet_conf.get('module_colors', {}) module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) for module, color in module_colors.iteritems()} for module, tests in sorted(collector.tests.iteritems()): for ((test, param), configs) in sorted(tests.iteritems()): sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) sheet.write(row, 1, test) param_list = param[1:-1].split(', ') if param.startswith('(') and param.endswith(')') else [param] image_size = next(ifilter(re_image_size.match, param_list), None) if image_size is not None: (image_width, image_height) = map(int, image_size.split('x', 1)) sheet.write(row, 2, image_width) sheet.write(row, 3, image_height) del param_list[param_list.index(image_size)] data_type = next(ifilter(re_data_type.match, param_list), None) if data_type is not None: sheet.write(row, 4, data_type) del param_list[param_list.index(data_type)] sheet.row(row).write(5, ' | '.join(param_list)) col = 6 for c in config_names: if c in configs: sheet.write(row, col, configs[c], time_style) else: sheet.write(row, col, None, no_time_style) col += 1 if args.show_times_per_pixel: sheet.write(row, col, xlwt.Formula('{0} * 1000000 / ({1} * {2})'.format( xlwt.Utils.rowcol_to_cell(row, col - 1), xlwt.Utils.rowcol_to_cell(row, 2), xlwt.Utils.rowcol_to_cell(row, 3) )), time_style ) col += 1 col += 1 # blank column for comp in sheet_comparisons: cmp_from = configs.get(comp["from"]) cmp_to = configs.get(comp["to"]) if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number): try: speedup = cmp_from / cmp_to sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else bad_speedup_style if speedup < 0.9 else speedup_style) except ArithmeticError as e: sheet.write(row, col, None, error_speedup_style) else: sheet.write(row, col, None, no_speedup_style) col += 1 row += 1 if row % 1000 == 0: sheet.flush_row_data() wb.save(args.output)
G = WCFG() processed = set() def make_rules(lhs, start, end): if (start, lhs, end) in processed: return processed.add((lhs, start, end)) for item in agenda.itercomplete(lhs, start, end): G.add(get_intersected_rule(item)) fsa_states = item.inner + (item.dot, ) for i, sym in itertools.ifilter(lambda (_, s): is_nonterminal(s), enumerate(item.rule.rhs)): if ( sym, fsa_states[i], fsa_states[i + 1] ) not in processed: # Nederhof does not perform this test, but in python it turned out crucial make_rules(sym, fsa_states[i], fsa_states[i + 1]) # create goal items for start, ends in agenda.itergenerating(root): if not fsa.is_initial(start): continue for end in itertools.ifilter(lambda q: fsa.is_final(q), ends): make_rules(root, start, end) final_weight = fsa.get_final_weight(end) G.add( Rule(make_symbol(goal, None, None), [make_symbol(root, start, end)], final_weight)) return G