def can_join_frame(self, f): #Assumes frames are filled in by device index. Otherwise breaks... #Will only be compared if signature (class, group) match. #Our concern is with data read/write only. #False if write and earlier device isBH #False if read and later device isBH #False if isBH and earlier device is read #False if isBH and later device is write #Otherwise True haswrite = self.data is not None and \ not isinstance(self.data, NoCareBitarray) hasread = self.read isBH = self.isBH for prim in islice(f, 0, self._device_index): if prim is not None: if (haswrite and prim.isBH) or\ (self.isBH and prim.read): return False for prim in islice(f, self._device_index+1, len(f)): if prim is not None: otherhaswrite = prim.data is not None and \ not isinstance(prim.data, NoCareBitarray) if (hasread and prim.isBH) or\ (self.isBH and otherhaswrite): return False return True
def _mock_fetch_uris(endpoint, resource_uri): raw_values = _get_raw_values() values = iter(raw_values) rest = islice(values, 20) rest_data = list(rest) offset = 0 while rest_data: if offset == 0: # Match every request without "offset" in it. regexp_txt = "(?!.*offset)" else: regexp_txt = ".*&offset=%d.*" % offset regexp = re.compile(endpoint + regexp_txt) offset += 20 if offset == 80: next_uri = None else: query = '/?limit=20&key=install&offset=%d' next_uri = resource_uri + query % offset HTTPretty.register_uri( HTTPretty.GET, regexp, body=json_dumps({'meta': {"limit": 20, "next": next_uri, "offset": 0, "previous": None, "total_count": len(raw_values)}, 'objects': rest_data})) rest = islice(values, 20) rest_data = list(rest)
def transform_lines_to_nn_input(tokenized_dialog_lines, token_to_index): """ Splits lines (IterableSentences) and generates numpy arrays of token ids suitable for training. Doesn't store all lines in memory. """ x_data_iterator, y_data_iterator, iterator_for_len_calc = file_buffered_tee(tokenized_dialog_lines, 3) _logger.info('Iterating through lines to get number of elements in the dataset') n_dialogs = sum(1 for _ in iterator_for_len_calc) x_data_iterator = islice(x_data_iterator, 0, None, 2) y_data_iterator = islice(y_data_iterator, 1, None, 2) n_dialogs /= 2 y_data_iterator, y_data_iterator_for_context = file_buffered_tee(y_data_iterator) x_data_iterator = _get_x_data_iterator_with_context(x_data_iterator, y_data_iterator_for_context) _logger.info('Iterating through lines to get input matrix') x_ids = transform_contexts_to_token_ids( x_data_iterator, token_to_index, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE, max_contexts_num=n_dialogs) _logger.info('Iterating through lines to get output matrix') y_ids = transform_lines_to_token_ids( y_data_iterator, token_to_index, OUTPUT_SEQUENCE_LENGTH, n_dialogs, add_start_end=True) return x_ids, y_ids, n_dialogs
def setUp(self): # Instead of random points, use deterministic, pseudo-random Halton # sequences for repeatability sake. self.coords = zip( list(islice(halton(5), 20, 120)), list(islice(halton(7), 20, 120)), )
def _parse_set_weight_values(argvish): new_cmd_format, opts, args = validate_args(argvish) # We'll either parse the all-in-one-string format or the # --options format, # but not both. If both are specified, raise an error. try: devs = [] if not new_cmd_format: if len(args) % 2 != 0: print(Commands.set_weight.__doc__.strip()) exit(EXIT_ERROR) devs_and_weights = izip(islice(argvish, 0, len(argvish), 2), islice(argvish, 1, len(argvish), 2)) for devstr, weightstr in devs_and_weights: devs.extend(builder.search_devs( parse_search_value(devstr)) or []) weight = float(weightstr) _set_weight_values(devs, weight) else: if len(args) != 1: print(Commands.set_weight.__doc__.strip()) exit(EXIT_ERROR) devs.extend(builder.search_devs( parse_search_values_from_opts(opts)) or []) weight = float(args[0]) _set_weight_values(devs, weight) except ValueError as e: print(e) exit(EXIT_ERROR)
def test_numbered_symbols(): ns = cse_main.numbered_symbols(prefix='y') assert list(itertools.islice(ns, 0, 10)) == [Symbol('y%s'%i) for i in range(0, 10)] ns = cse_main.numbered_symbols(prefix='y') assert list(itertools.islice(ns, 10, 20)) == [Symbol('y%s'%i) for i in range(10, 20)] ns = cse_main.numbered_symbols() assert list(itertools.islice(ns, 0, 10)) == [Symbol('x%s'%i) for i in range(0, 10)]
def test_islice(self): import itertools it = itertools.islice([], 0) raises(StopIteration, it.next) it = itertools.islice([1, 2, 3], 0) raises(StopIteration, it.next) it = itertools.islice([1, 2, 3, 4, 5], 3) for x in [1, 2, 3]: assert it.next() == x raises(StopIteration, it.next) it = itertools.islice([1, 2, 3, 4, 5], 1, 3) for x in [2, 3]: assert it.next() == x raises(StopIteration, it.next) it = itertools.islice([1, 2, 3, 4, 5], 0, 3, 2) for x in [1, 3]: assert it.next() == x raises(StopIteration, it.next) it = itertools.islice([1, 2, 3], 0, None) for x in [1, 2, 3]: assert it.next() == x raises(StopIteration, it.next) assert list(itertools.islice(xrange(100), 10, 3)) == [] # new in 2.5: start=None or step=None assert list(itertools.islice(xrange(10), None)) == range(10) assert list(itertools.islice(xrange(10), None,None)) == range(10) assert list(itertools.islice(xrange(10), None,None,None)) == range(10)
def chunks(iterable, size=100): """Yields chunks of the given size as tuples""" it = iter(iterable) chunk = tuple(itertools.islice(it, size)) while chunk: yield chunk chunk = tuple(itertools.islice(it, size))
def generate_paths_toplevel(params, seed_iter): # Do actual generation on slave processes/computers G.pool.apply(set_policy_var_values_flat, G.agent.policy.var_values_flat()) G.pool.apply(set_vf_var_values_flat, G.agent.vf.var_values_flat()) G.pool.apply(clear_paths,None) if params['paths_per_batch'] != 0: # x0_nd = iss.get_n(params['paths_per_batch']) li_seed = list(itertools.islice(seed_iter,params['paths_per_batch'])) n_paths = params['paths_per_batch'] path_lengths = concatenate(G.pool.scatter(generate_and_store_paths, li_seed)) n_timesteps = sum(path_lengths) elif params['timesteps_per_batch'] != 0: n_timesteps = 0 n_paths = 0 while n_timesteps < params['timesteps_per_batch']: li_seed = list(itertools.islice(seed_iter,params['path_chunk_size'])) path_lengths = concatenate(G.pool.scatter(generate_and_store_paths, li_seed)) n_paths += len(path_lengths) n_timesteps += sum(path_lengths) else: raise NotImplementedError print "got %i paths with total num timesteps %i"%(n_paths,n_timesteps)
def computeConvexHull(schema, schema_graph, values_dict, curve_path): '''Computes the convex hull by starting with an initial hull, and then expanding if there are some extrusions to consider.''' initial_hull = computeInnerHull(schema, curve_path) final_hull = [] if len([e for e in schema["Edges"].keys() if schema["Edges"][e]["Type"] == "CircularArc"]) == 0: for hull in initial_hull: final_hull.append([(schema["Vertices"][str(h)]["Position"]["X"], schema["Vertices"][str(h)]["Position"]["Y"]) for h in hull]) return final_hull else: '''Only goes through this if there exists a circular arc.''' for hull in initial_hull: new_hull = [(schema["Vertices"][str(h)]["Position"]["X"], schema["Vertices"][str(h)]["Position"]["Y"]) for h in hull] points = hull+[hull[0]] #let's not talk about this hack... for a,b in izip(points, islice(points, 1, None)): path = [p for p in nx.shortest_path(schema_graph, int(a), int(b))] arcs_on_path = [arc for arc in [edgeLookup((x,y), schema) for x,y in izip(path, islice(path, 1, None))] if schema["Edges"][arc]["Type"] == "CircularArc"] extrusions = [x for x in arcs_on_path #Dirty trick to check if an arc is an extrusion from the hull. if path.index(int(values_dict[x]["start_id"])) > path.index(int(values_dict[x]["finish_id"]))] for x in extrusions: new_hull.extend(values_dict[x]["box_points"]) final_hull.append(grahams_hull(new_hull)) return final_hull
def subsample(tree_file, burnin, final_file_len): numTrees = file_len(tree_file) - burnin - 1 counter = numTrees/final_file_len stop = final_file_len*counter + 1 + burnin if min(file_len(tree_file), numTrees, counter, stop) > 0: if numTrees > final_file_len*counter + 1: #check dendropy version version=dendropy.__version__.split(".")[0] if version == '4': #open tree file, create output file name, initiate dendropy tree list with open(tree_file) as trees: tree_list = dendropy.TreeList() #iterate through lines in file, split out newick string, transform to tree object, add to tree list for i in itertools.islice(trees, burnin, stop, counter): newTree = (i.split()[4]) tree = dendropy.Tree.get(data=newTree, schema='newick') tree_list.append(tree) #print(len(tree_list)) elif version == '3': with open(t) as trees: tOut = tName + '.t' tree_list = dendropy.TreeList() for i in itertools.islice(trees, burnin, stop, counter): newTree = (i.split()[4]) tree = dendropy.Tree.get_from_string(newTree, schema='newick') tree_list.append(tree) #print(len(tree_list)) #if file isn't long enough. elif numTrees <= final_file_len*counter + 1: print t+" is too short. A .t file has NOT been created." elif min(file_len(tree_file), numTrees, counter, stop) <= 0: print t+" is too short" return tree_list
def mysql_multitable(db_eng, dict_iter, table_prefix, table_column, table_column_transform, batch_size=1000, template_table=None ): ''' Takes an iterator and outputs into multiple tables based on data from the iterator :param db_eng: SQLAlchemy db engine :param dict_iter: an iterator of dicts :param table_prefix: for the naming of the tables :param table_column: column to use for deciding which table to insert into :param table_column_transform: function to generate table name suffix using 'table_column' value :param batch_size: once a target table is identified, assume the next n rows go here ''' batch = list(itertools.islice(dict_iter, 0, batch_size)) batch_num = 0 while len(batch) > 0: df_batch = pd.DataFrame(batch) df_batch["batch_num"] = batch_num first_line = dict(df_batch.ix[0]) table_name = table_prefix + table_column_transform(first_line[table_column]) try: if template_table: db_eng.execute("CREATE TABLE IF NOT EXISTS {} LIKE {}".format(table_name, template_table)) df_batch.to_sql(table_name, db_eng, if_exists="append", index=False) except Exception as e: exc_info = sys.exc_info() print "Unexpected error:", exc_info[0], exc_info[1], exc_info[2] else: print "Successful insert of {} rows!".format(len(df_batch)) batch = list(itertools.islice(dict_iter, 0, batch_size)) batch_num += 1
def debug_WhosTurnGenerator(): # test with player 1 starting player_one_starts = True player_one_expected_output = [1, 2, 1, 2, 1, 2] player_one_first_6 = itertools.islice( tic_tac_toe.WhosTurnGenerator(player_one_starts), 0, 6) player_one_output = [] for item in player_one_first_6: player_one_output.append(item) # test with player 2 starting player_two_starts = False player_two_expected_output = [2, 1, 2, 1, 2, 1] player_two_first_6 = itertools.islice( tic_tac_toe.WhosTurnGenerator(player_two_starts), 0, 6) player_two_output = [] for item in player_two_first_6: player_two_output.append(item) if player_one_output != player_one_expected_output: print("WhosTurnGenerator: player_one_output != " "player_one_expected_output \n \ player_one_output: {0} \n \ player_one_expected_output {1}" .format(player_one_output, player_one_expected_output)) if player_two_output != player_two_expected_output: print("WhosTurnGenerator: player_two_output != " "player_two_expected_output \n \ player_two_output: {0} \n \ player_two_expected_output {1}" .format(player_two_output, player_two_expected_output))
def read(self, size=None): if self._start_time is None: self._start_time = int(time.time()) elapsed_time = int(time.time()) - self._start_time bytes_remaining = self._total_size - self._bytes_read if bytes_remaining == 0: if elapsed_time < self._total_seconds: sleep_time = self._total_seconds - elapsed_time self._log.info("sleeping {0}".format(sleep_time)) time.sleep(sleep_time) return "" sequences_read = self._bytes_read / self._sequence_size sequence_time = sequences_read * self._seconds_per_sequence if elapsed_time < sequence_time: sleep_time = sequence_time - elapsed_time self._log.info("sleeping {0}".format(sleep_time)) time.sleep(sleep_time) bytes_remaining = bytes_remaining % self._sequence_size if bytes_remaining == 0: bytes_remaining = self._sequence_size if size is None or size >= bytes_remaining: self._bytes_read += bytes_remaining data = "".join(islice(self._source, bytes_remaining)) return data self._bytes_read += size data = "".join(islice(self._source, size)) return data
def takeBatchOnArbitraryIterable(iterable, size): import itertools it = iter(iterable) item = list(itertools.islice(it, size)) while item: yield item item = list(itertools.islice(it, size))
def paginate(iterable, page_size): i1, i2 = itertools.tee(iterable) while True: iterable, page = (itertools.islice(i1, page_size, None), list(itertools.islice(i2, page_size))) if len(page)==0: break yield page
def normalize(self): ''' Translates data into dictionary The schedstat file is a table - each record according to kernel version so we store the data in simple lists as we do not currently know the field names. The file consists of 3 records for every cpu - this class returns a dictionary keyed on cpuN where N varies from 0 upwards ''' LOGGER.debug("Normalize") lines = self.lines ret = {} rec = None for line in lines: vals = line.split() key1 = vals[0] if key1 == 'version': ret[key1] = vals[1:] elif key1 == 'timestamp': ret[key1] = vals[1:] elif self.RE1.match(key1): rec = key1 ret[key1] = [int(v) for v in islice(vals, 1, None)] elif rec: if self.RE2.match(key1): key2 = '%s-%s' % (rec, key1) ret[key2] = [vals[1]] ret[key2].extend([int(v) for v in islice(vals, 2, None)]) return ret
def chunks(iterable, chunk_size): """Chunks data into chunk with size<=chunk_size.""" iterator = iter(iterable) chunk = list(itertools.islice(iterator, 0, chunk_size)) while chunk: yield chunk chunk = list(itertools.islice(iterator, 0, chunk_size))
def append_barcode_paired(t): barcode, excludebarcode, outdir, inputfile = t bs = barcode.seq trim = len(bs) fake_qual = len(bs) * "#" outfastq = op.join(outdir, "{0}.{1}.fastq".format(barcode.id, barcode.seq)) r1, r2 = inputfile p1fp, p2fp = FastqPairedIterator(r1, r2) fw = open(outfastq, "w") while True: a = list(islice(p1fp, 4)) if not a: break title, seq, plus, qual = a seq = seq.strip() if not is_barcode_sample(seq, barcode, excludebarcode, trim): continue fw.writelines(a) title, seq, plus, qual = list(islice(p2fp, 4)) title, seq, qual = title.strip(), seq.strip(), qual.strip() # append barcode seq = bs + seq qual = fake_qual + qual print >> fw, "{0}\n{1}\n+\n{2}".format(title, seq, qual) fw.close()
def align_vertices(mesh, distr): """Distribute vertices regularly or align them. Arguments: @mesh (Mesh): the edited mesh datablock @distr (Bool): True, when to perform align & distribute """ vsel = get_selected_vertices(mesh) if len(vsel) < 3: raise Exception("need 3 vertices at least") vsel = XYZvertexsort(vsel) point = vsel[0].co vect = (vsel[-1].co - point) * (1.0 / (len(vsel) - 1)) if vect.length < EPSILON: return if distr == True: # align & distribute for mult, vert in enumerate(islice(vsel, 1, len(vsel) - 1)): v = vert.co finalv = (mult + 1) * vect + point v.x = finalv.x v.y = finalv.y v.z = finalv.z else: # align only for vert in islice(vsel, 1, len(vsel) - 1): v = vert.co finalv = project_point_vect(v, point, vect) v.x = finalv.x v.y = finalv.y v.z = finalv.z mesh.update()
def _parse_track(self, num, disc): """Return a Track object that contains a single track element from the parsed CUE text data. This method implements the 'track' scanning steps of the parser. Parameters: num : the track index of the track to parse. The first track starts at 0. """ # splice track data if num+1 < len(self._track_lines): data = itr.islice(self._cue, self._track_lines[num], self._track_lines[num+1]) else: data = itr.islice(self._cue, self._track_lines[num], None) # lookup the previous file name file_name = self._active_file(num) # <-- This is the main track parsing step ---> trk = mt_disc.Track(num+1) # Every CUE file has list of FILE, TRACK, and INDEX commands. The FILE # commands specify the active FILE for the following INDEX commands. The # TRACK indicate the logical beginning of a new TRACK info list with TITLE # and PERFORMER tags. cue_data = map( self._tinfo_search.match, data ) # raise error if unkown match is found if filter( lambda (key,match): not match, cue_data): raise ParseError, "Unmatched pattern in stream: '%s'" % txt
def grouper(iterable, n): "grouper('ABCDEFG', 3) --> [['ABC'], ['DEF'], ['G']]" i = iter(iterable) g = list(islice(i, 0, n)) while g: yield g g = list(islice(i, 0, n))
def entry(self, i): """Return the ith entry (starting numbering at 1), or raise an OutOfBoundsException if i is outside the acceptable range.""" if i < 1: raise OutOfBoundsException( "Index must be greater than 0: " + str(i)) entries = None # If I am using an index, I can check to make sure that i is # in the appropriate range. Then find the offset of the entry # and skip to that point in the file. if self.index is not None: if i > len(self): raise OutOfBoundsException("No entry with number " + str(i)) offset = self.index[i - 1] entries = islice(self.entries(offset), 0, 1) # Otherwise I need to just skip over i - 1 entries else: entries = islice(self.entries(), i - 1, i) entries = list(entries) if len(entries) == 0: raise OutOfBoundsException("No entry with number " + str(i)) return list(entries)[0]
def execute_slice(self, contexts): subject = execute(self.subject, contexts) try: return subject[self.slice] except TypeError: # subject could be an iterable but not a sequence, eg. a generator # XXX this consumes the iterable. This may be unexpected if a # generator is used more than once. start = self.slice.start stop = self.slice.stop step = self.slice.step if start is None: start = 0 if step is None: step = 1 if step == 0: raise ValueError('Step can not be zero for slicing.') if start >= 0 and (stop is None or stop >= 0) and step > 0: return islice(subject, start, stop, step) elif start >= 0 and (stop is None or stop >= 0): # step < 0 => step > 0 step = -step return reversed(list(islice(subject, start, stop, step))) else: return list(subject)[self.slice]
def generate_bytes(bitstream,framerate): bitmasks = [0x1,0x2,0x4,0x8,0x10,0x20,0x40,0x80] # Compute the number of audio frames used to encode a single data bit frames_per_bit = int(round(float(framerate)*8/BASE_FREQ)) # Queue of sampled sign bits sample = deque(maxlen=frames_per_bit) # Fill the sample buffer with an initial set of data sample.extend(islice(bitstream,frames_per_bit-1)) sign_changes = sum(sample) # Look for the start bit for val in bitstream: if val: sign_changes += 1 if sample.popleft(): sign_changes -= 1 sample.append(val) # If a start bit detected, sample the next 8 data bits if sign_changes <= 9: byteval = 0 for mask in bitmasks: if sum(islice(bitstream,frames_per_bit)) >= 12: byteval |= mask yield byteval # Skip the final two stop bits and refill the sample buffer sample.extend(islice(bitstream,2*frames_per_bit,3*frames_per_bit-1)) sign_changes = sum(sample)
def messages(count, size): """Generator for count messages of the provided size""" import string # Make sure we have at least 'size' letters letters = islice(cycle(chain(string.lowercase, string.uppercase)), size) return islice(cycle("".join(l) for l in permutations(letters, size)), count)
def basic(topic="topic", channel="channel", count=1e6, size=10, gevent=False, max_in_flight=2500, profile=False): """Basic benchmark""" if gevent: from gevent import monkey monkey.patch_all() # Check the types of the arguments count = int(count) size = int(size) max_in_flight = int(max_in_flight) from nsq.http import nsqd from nsq.reader import Reader print "Publishing messages..." for batch in grouper(messages(count, size), 1000): nsqd.Client("http://localhost:4151").mpub(topic, batch) print "Consuming messages" client = Reader(topic, channel, nsqd_tcp_addresses=["localhost:4150"], max_in_flight=max_in_flight) with closing(client): start = -time.time() if profile: with profiler(): for message in islice(client, count): message.fin() else: for message in islice(client, count): message.fin() start += time.time() print "Finished %i messages in %fs (%5.2f messages / second)" % (count, start, count / start)
def demo(): from en.parser.nltk_lite.corpora import ppattach from itertools import islice from pprint import pprint pprint(list(islice(ppattach.raw('training'), 0, 5))) pprint(list(islice(ppattach.dictionary('training'), 0, 5)))
def get_csv_export_info(self, preview_data): """Shows csv export preview dialog and returns csv_info csv_info is a tuple of dialect, has_header, digest_types Parameters ---------- preview_data: Iterable of iterables \tContains csv export data row-wise """ preview_rows = 100 preview_cols = 100 export_preview = list(list(islice(col, None, preview_cols)) for col in islice(preview_data, None, preview_rows)) filterdlg = CsvExportDialog(self.main_window, data=export_preview) if filterdlg.ShowModal() == wx.ID_OK: dialect, has_header = filterdlg.csvwidgets.get_dialect() digest_types = [types.StringType] else: filterdlg.Destroy() return filterdlg.Destroy() return dialect, has_header, digest_types
def listified_fn(*input_list): input_dict = OrderedDict() input_it = iter(input_list) input_dict.update(equizip(sequences.keys(), it.islice(input_it, len(sequences)))) for name, info in outputs_info.items(): if info is None: continue # no inputs elif isinstance(info, (dict, OrderedDict)): ntaps = len(info.get("taps", [-1])) else: # assume some kind of tensor variable or numpy array ntaps = 1 taps = [next(input_it) for _ in range(ntaps)] input_dict[name] = taps if ntaps > 1 else taps[0] input_dict.update(equizip(non_sequences.keys(), it.islice(input_it, len(non_sequences)))) # input_list should be exactly empty here try: next(input_it) except StopIteration: pass else: assert False output_dict = fn(**input_dict) output_list = [output_dict[output_name].copy(name=output_name) for output_name in outputs_info.keys()] return output_list
# 2019/12/08 import itertools as it class Primes: def __init__(self): self.primes = it.count(2) def __iter__(self): return self def __next__(self): p = next(self.primes) self.primes = filter(lambda x: x % p != 0, self.primes) return p num = 10_001 res = list(it.islice(Primes(), num)) print(res[-1])
def parallel_ring(objects, generator_func, mutable=False): r"""This function loops in a ring around a set of objects, yielding the results of generator_func and passing from one processor to another to avoid IO or expensive computation. This function is designed to operate in sequence on a set of objects, where the creation of those objects might be expensive. For instance, this could be a set of particles that are costly to read from disk. Processor N will run generator_func on an object, and the results of that will both be yielded and passed to processor N-1. If the length of the objects is not equal to the number of processors, then the final processor in the top communicator will re-generate the data as needed. In all likelihood, this function will only be useful internally to yt. Parameters ---------- objects : Iterable The list of objects to operate on. generator_func : callable This function will be called on each object, and the results yielded. It must return a single NumPy array; for multiple values, it needs to have a custom dtype. mutable : bool Should the arrays be considered mutable? Currently, this will only work if the number of processors equals the number of objects. dynamic : bool This governs whether or not dynamic load balancing will be enabled. This requires one dedicated processor; if this is enabled with a set of 128 processors available, only 127 will be available to iterate over objects as one will be load balancing the rest. Examples -------- Here is a simple example of a ring loop around a set of integers, with a custom dtype. >>> dt = np.dtype([('x', 'float64'), ('y', 'float64'), ('z', 'float64')]) >>> def gfunc(o): ... np.random.seed(o) ... rv = np.empty(1000, dtype=dt) ... rv['x'] = np.random.random(1000) ... rv['y'] = np.random.random(1000) ... rv['z'] = np.random.random(1000) ... return rv ... >>> obj = range(8) >>> for obj, arr in parallel_ring(obj, gfunc): ... print(arr['x'].sum(), arr['y'].sum(), arr['z'].sum()) ... """ if mutable: raise NotImplementedError my_comm = communication_system.communicators[-1] my_size = my_comm.size my_rank = my_comm.rank # This will also be the first object we access if not parallel_capable and not mutable: for obj in objects: yield obj, generator_func(obj) return generate_endpoints = len(objects) != my_size # gback False: send the object backwards # gforw False: receive an object from forwards if len(objects) == my_size: generate_endpoints = False gback = False gforw = False else: # In this case, the first processor (my_rank == 0) will generate. generate_endpoints = True gback = my_rank == 0 gforw = my_rank == my_size - 1 if generate_endpoints and mutable: raise NotImplementedError # Now we need to do pairwise sends source = (my_rank + 1) % my_size dest = (my_rank - 1) % my_size oiter = itertools.islice(itertools.cycle(objects), my_rank, my_rank + len(objects)) idata = None isize = np.zeros((1, ), dtype="int64") osize = np.zeros((1, ), dtype="int64") for obj in oiter: if idata is None or gforw: idata = generator_func(obj) idtype = odtype = idata.dtype if get_mpi_type(idtype) is None: idtype = "c" yield obj, idata # We first send to the previous processor tags = [] if not gforw: tags.append(my_comm.mpi_nonblocking_recv(isize, source)) if not gback: osize[0] = idata.size tags.append(my_comm.mpi_nonblocking_send(osize, dest)) my_comm.mpi_Request_Waitall(tags) odata = idata tags = [] if not gforw: idata = np.empty(isize[0], dtype=odtype) tags.append( my_comm.mpi_nonblocking_recv(idata.view(idtype), source, dtype=idtype)) if not gback: tags.append( my_comm.mpi_nonblocking_send(odata.view(idtype), dest, dtype=idtype)) my_comm.mpi_Request_Waitall(tags) del odata
def parallel_objects(objects, njobs=0, storage=None, barrier=True, dynamic=False): r"""This function dispatches components of an iterable to different processors. The parallel_objects function accepts an iterable, *objects*, and based on the number of jobs requested and number of available processors, decides how to dispatch individual objects to processors or sets of processors. This can implicitly include multi-level parallelism, such that the processor groups assigned each object can be composed of several or even hundreds of processors. *storage* is also available, for collation of results at the end of the iteration loop. Calls to this function can be nested. This should not be used to iterate over datasets -- :class:`~yt.data_objects.time_series.DatasetSeries` provides a much nicer interface for that. Parameters ---------- objects : Iterable The list of objects to dispatch to different processors. njobs : int How many jobs to spawn. By default, one job will be dispatched for each available processor. storage : dict This is a dictionary, which will be filled with results during the course of the iteration. The keys will be the dataset indices and the values will be whatever is assigned to the *result* attribute on the storage during iteration. barrier : bool Should a barier be placed at the end of iteration? dynamic : bool This governs whether or not dynamic load balancing will be enabled. This requires one dedicated processor; if this is enabled with a set of 128 processors available, only 127 will be available to iterate over objects as one will be load balancing the rest. Examples -------- Here is a simple example of iterating over a set of centers and making slice plots centered at each. >>> for c in parallel_objects(centers): ... SlicePlot(ds, "x", "Density", center = c).save() ... Here's an example of calculating the angular momentum vector of a set of spheres, but with a set of four jobs of multiple processors each. Note that we also store the results. >>> storage = {} >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage): ... sp = ds.sphere(c, (100, "kpc")) ... sto.result = sp.quantities["AngularMomentumVector"]() ... >>> for sphere_id, L in sorted(storage.items()): ... print(centers[sphere_id], L) ... """ if dynamic: from .task_queue import dynamic_parallel_objects for my_obj in dynamic_parallel_objects(objects, njobs=njobs, storage=storage): yield my_obj return if not parallel_capable: njobs = 1 my_communicator = communication_system.communicators[-1] my_size = my_communicator.size if njobs <= 0: njobs = my_size if njobs > my_size: mylog.error( "You have asked for %s jobs, but you only have %s processors.", njobs, my_size, ) raise RuntimeError my_rank = my_communicator.rank all_new_comms = np.array_split(np.arange(my_size), njobs) for i, comm_set in enumerate(all_new_comms): if my_rank in comm_set: my_new_id = i break if parallel_capable: communication_system.push_with_ids(all_new_comms[my_new_id].tolist()) to_share = {} # If our objects object is slice-aware, like time series data objects are, # this will prevent intermediate objects from being created. oiter = itertools.islice(enumerate(objects), my_new_id, None, njobs) for result_id, obj in oiter: if storage is not None: rstore = ResultsStorage() rstore.result_id = result_id yield rstore, obj to_share[rstore.result_id] = rstore.result else: yield obj if parallel_capable: communication_system.pop() if storage is not None: # Now we have to broadcast it new_storage = my_communicator.par_combine_object(to_share, datatype="dict", op="join") storage.update(new_storage) if barrier: my_communicator.barrier()
def previousAndNext(some_iterable): #http://stackoverflow.com/questions/1011938/python-previous-and-next-values-inside-a-loop prevs, items, nexts = tee(some_iterable, 3) prevs = chain([None], prevs) nexts = chain(islice(nexts, 1, None), [None]) return izip(prevs, items, nexts)
print(wn.lemma('dog.n.01.dog').synset()) print("*"*111) print(sorted(wn.langs())) print(wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')) print(wn.synset('spy.n.01').lemma_names('jpn')) print(wn.synset('dog.n.01').lemma_names('ita')) print("*"*111) dog = wn.synset('dog.n.01') print(dog.hypernyms()) print(dog.hyponyms()) print(dog.member_holonyms()) print(dog.root_hypernyms()) print(wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))) print("*"*111) good = wn.synset('good.a.01') # print(good.antonyms()) print(good.lemmas()[0].antonyms()) print("*"*111) dog = wn.synset('dog.n.01') cat = wn.synset('cat.n.01') hit = wn.synset('hit.v.01') slap = wn.synset('slap.v.01') print("*"*111) #Walk through the noun synsets looking at their hypernyms: from itertools import islice for synset in islice(wn.all_synsets('n'), 5): print(synset, synset.hypernyms())
def _love_you_forever(self, start=0, status=1, indent=0) -> Union[int, Tuple[int, list], list]: jump = 0 index = start # 为什么需要这行语句呢?因为程序可能他妈根本就不经过下面这个for循环! result = [] if status == 2: # 对于内部不解析的块,尤其是`block:code`,需要记录它们的空白行; more = 0 for index, line in enumerate(self.lines[start:], start): # 掠过行 if jump: jump -= 1 continue # 忽略已经被解析过的行。 line = line.rstrip() if not line: if status == 2: more += 1 # 记录内部不解析的块的空白行。 continue # 忽略空白行 # 探寻缩进等级 pos = 0 # 当前缩进空格数 back = False # 标记:是否要打回去(往回缩进) while pos < len(line) and line[pos] == ' ': pos += 1 if status == 2: # 对于内部不解析的块: pos_ = indent * 4 if pos < pos_: # 如果实际缩进小于目标缩进,那么打回去; back = True else: # 否则,超过目标缩进的空格原样保留。 pos = pos_ else: div, mod = divmod(pos, 4) if div < indent: # 实际缩进小于目标缩进,打回去; back = True elif mod != 0 or div > indent: # 实际缩进大于目标缩进,*忽略该行*。 continue # 错误缩进的处理 if back: index -= 1 if status == 0: return index # 注释不需要CONTENT(RESULT), else: return index, result # 其它则都需要。 # 0: 注释 if status == 0: continue # 直接忽略注释。 # 以下是非注释 if pos: line = line[pos:] prefix = line[0] # 获取行首字符,优化运算。 # 1: 常规 if status == 1: # List if prefix in '.0123456789?~x-:+v*': Axx = RE_QUICK_LIST.fullmatch(line) if Axx: if Axx.group(1): Qtype, Qstatus = 'u', 3 elif Axx.group(2): Qtype, Qstatus = 'o', 4 elif Axx.group(3): Qtype, Qstatus = 't', 5 Qindex, Qcontent = index, [] while Qindex < self.lines_length: Qindex, Qresult = self._love_you_forever(Qindex, Qstatus, indent) if Qresult: Qcontent.append(Qresult) else: break result.append([3, 'lst', {'typ': Qtype, 'sta': 1 if prefix == '?' else int(Axx.group(2))} if Qtype == 'o' else {'typ': Qtype}, Qcontent]) jump = Qindex - index continue # Header if prefix == '=': Axx = RE_QUICK_HEAD.fullmatch(line) if Axx: Qtype, Qheader = Axx.groups() Qtype = len(Qtype) # 其正确长度由正则表达式保护 result.append([3, 'had', {'lev': Qtype}, Qheader]) # 非顶层header不加入headers if not indent: self.headers.append((Qtype, Qheader)) continue # Separator & page break elif prefix in "%-" and 3 <= len(line) == line.count(prefix): # % 100101 # - 101101 result.append([3, ['pgb', 'sep'][(ord(prefix)>>3)&1], None, None]) continue # Anything else: Axx = RE_QUICK_ALLS.fullmatch(line) if Axx: Quick = Axx.group(2) # Config if prefix == '&' and indent == 0 and Quick: Axx = RE_ALIAS.fullmatch(Quick) if Axx: Qmodule, Qalias = Axx.groups() if Qmodule in self.ext_modules or Qmodule == 'RAINLotus': if Qmodule != 'RAINLotus': if Qalias and (Qmodule not in self.imports): # 如果一个模块设置了多个别名,那么应该只使用第一次设置的那个。 self.imports[Qalias] = Qmodule self.imported.add(Qmodule) Qindex, Qresult = self._love_you_forever(index+1, 7, indent+1) self.config[Qmodule].extend(Qresult) jump = Qindex - index # General elif prefix == '/' and Quick: Qidex = RE_IDEX.match(Quick) if Qidex: Qalias, Qmethod = Qidex.groups() if Qalias: # 过滤掉所有未导入的模块 if Qalias in self.imports: Qmodule = self.imports[Qalias] elif Qalias in self.imported: Qmodule = Qalias else: continue try: if not self.ext_modules[Qmodule][0][0](Qmethod): continue except Exception: continue try: Qstatus = 1 if self.ext_modules[Qmodule][1][0](Qmethod) else 2 except Exception: Qstatus = 2 elif Qmethod in {'image', 'audio', 'video'}: Qmodule = None Qstatus = 1 else: continue Qargx = RE_ARGX.match(Quick, Qidex.end()) if Qmodule: Qargs = ARGX2ARGS(Qargx.group(0)) else: Qargs = ARGX2ARGS(Qargx.group(0), { 'image': lambda x: x in {'src', 'alt'}, 'audio': lambda x: x in {'src', 'autoplay', 'loop', 'muted', 'preload'}, 'video': lambda x: x in {'src', 'autoplay', 'loop', 'muted', 'preload'} }[Qmethod]) Qsufx = RE_SUFX.fullmatch(Quick, Qargx.end()) Qtext = Qsufx.group(1) if Qsufx else None Qindex, Qresult = self._love_you_forever(index+1, Qstatus, indent+1) if Qalias: result.append([5, (Qmodule, Qmethod), Qargs, self._combin(Qtext, Qresult, Qstatus==2)]) else: result.append([3, { 'image': 'img', 'audio': 'aud', 'video': 'vid' }[Qmethod], Qargs, self._combin(Qtext, Qresult)]) jump = Qindex - index # Note elif prefix == '*' and Quick: Quick = Quick.lower() if Quick in ALLOWED_ARGS_NOTE: Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) result.append([3, 'not', {'typ': Quick}, Qresult]) jump = Qindex - index # Quote elif prefix == '"': Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) if Quick and Quick[:2] in {'--', '——'}: result.append([3, 'quo', {'aut': self._mesilf(Quick[2:].strip(), True)}, Qresult]) else: result.append([3, 'quo', {'aut': None}, self._combin(Quick, Qresult)]) jump = Qindex - index # Definition-list elif prefix == ':' and Quick: Qindex, Qcontent = index, [] while Qindex < self.lines_length: Qindex, Qresult = self._love_you_forever(Qindex, 6, indent) if Qresult: Qcontent.append(Qresult) else: break result.append([3, 'lst', {'typ': 'd'}, Qcontent]) jump = Qindex - index # Table elif prefix == '|': Qmode, Qheight, Qrotate = RE_TABLE.fullmatch(f' {Quick}').groups() if Quick else (None, None, None) # Qmode: 表格模式 # Qheight: 表格头部高度 # Qrotate: 是否旋转表格 Qindex, Qtable = self._love_you_forever(index+1, 2, indent+1) Qtable = filter(bool, Qtable) ### 解析表格 ### mod = Qmode.lower() if Qmode else 'quick' hei = int(Qheight) if Qheight else 1 fai = False # 统一数据格式 if mod == 'quick': fresh = map(lambda x: x[1:], csv.reader( Qtable, delimiter='|', escapechar='\\', quoting=csv.QUOTE_NONE )) elif mod == 'csv': fresh = csv.reader(Qtable) else: try: fresh = json.loads( ''.join(Qtable), parse_int=str, parse_float=str, parse_constant=str ) except json.JSONDecodeError: fai = True if isinstance(fresh, dict): try: head, align, body = fresh['head'], fresh['align'], fresh['body'] except KeyError: fai = True else: if any(map( lambda x: not isinstance(x, list) or any(map( lambda y: any(map( lambda z: not isinstance(z, str), y)), x)), (head, [align], body) )): fai = True else: hei = len(head) fresh = [] fresh.extend(head) fresh.append(align) fresh.extend(body) elif isinstance(fresh, list): if any(map( lambda x: not isinstance(x, list) or any(map( lambda y: any(map( lambda z: not isinstance(z, str), y)), x)), fresh )): fai = True else: fai = True if fai: jump = Qindex - index continue if mod != 'json': fresh = tuple(map(lambda x: tuple(map(lambda y: y.strip(), x)), fresh)) # 查找表格宽度及安全性保护及对齐控制文本 wid = 0 ava = [] ali = [] for i, row in enumerate(fresh): # 对齐控制行 if i == hei: ava.append(False) if not row: # for json ali = repeat('=') continue for j, sign in enumerate(row, 1): if sign in {'<', '=', '>'}: ali.append(sign) if j == len(row): if 0 < wid != len(row): fai = True break else: wid = len(row) elif j == len(row): if sign in {'<<<', '===', '>>>'}: ali = chain(ali, repeat(sign[0])) elif not sign: # for quick ali = repeat('=') else: fai = True break else: fai = True break if fai: break continue # 空行 if not row: ava.append(False) continue # 常规行 if row[-1] in {'<<<', '===', '>>>'}: ava.append(len(row) != 1) else: ava.append(True) if 0 < wid != len(row): fai = True break else: wid = len(row) if fai or not ali or wid == 0: jump = Qindex - index continue # 逐单元格计算 table = [] for row in compress(fresh, ava): span = 1 cache = None rowing = [] for i, cell in enumerate(row, 1): if cell in {'>', '>>>'}: if not cache: fai = True break elif cell == '>': span += 1 continue elif i == len(row): span += wid - i + 1 break if cache: rowing.append([span, cache]) cache = self._mesilf(cell, True) span = 1 else: cache = self._mesilf(cell, True) if fai: break if cache: rowing.append([span, cache]) table.append(rowing) if fai: jump = Qindex - index continue ### 这才是最终要传递的 ### result.append([ 3, 'tab', { 'hei': hei, 'rot': bool(Qrotate), 'ali': list(islice(ali, wid)) }, table ]) jump = Qindex - index # Collapse elif prefix == '~': if Quick: try: Qopen, Qsummary = Quick.split(' ', 1) if Qopen.lower() != 'open': Qopen, Qsummary = False, self._mesilf(Quick, True) else: Qopen, Qsummary = True, self._mesilf(Qsummary, True) except ValueError: if Quick.lower() == 'open': Qopen, Qsummary = True, None else: Qopen, Qsummary = False, self._mesilf(Quick, True) else: Qopen, Qsummary = False, None # Qopen: 是否默认展开 # Qsummary: 摘要 Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) result.append([3, 'col', {'opn': Qopen, 'sum': Qsummary}, Qresult]) jump = Qindex - index # Dialog elif prefix == '@': Qargx = RE_ARGX.fullmatch(f' {Quick}') if Qargx: Qargs = ARGX2ARGS(Qargx.group(0), lambda x: x in ALLOWED_ARGS_DIALOG) else: Qargs = {} Qindex, Qresult = self._love_you_forever(index+1, 10, indent+1) result.append([3, 'dia', Qargs, Qresult]) jump = Qindex - index # Footnote elif prefix == '>' and Quick: Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) result.append([3, 'fnt', {'fnt': Quick}, Qresult]) jump = Qindex - index # Code elif prefix == '`': Qindex, Qresult = self._love_you_forever(index+1, 2, indent+1) result.append([3, 'cod', {'lan': Quick.lower() if Quick else 'plaintext'}, Qresult]) jump = Qindex - index # Raw & Diagram & Formula elif prefix in '!#$': # ! 100001 # # 100011 # $ 100100 prefix = ord(prefix) >> 1 & 3 Qindex, Qresult = self._love_you_forever(index+1, 2, indent+1) result.append([3, ['raw', 'dgr', 'fml'][prefix], None, [Quick]+Qresult if Quick else Qresult]) jump = Qindex - index # Comment elif prefix == ';': Qindex = self._love_you_forever(index+1, 0, indent+1) jump = Qindex - index continue # 345: 无序/有序/Todos 列表 elif 3 <= status <= 5: Axx = RE_QUICK_LIST.fullmatch(line) if Axx: *ovo, text = Axx.groups() Qstatus = tuple(map(bool, ovo)).index(True) + 3 if status == Qstatus: Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) Qcontent = self._combin(text, Qresult) return Qindex+1, (Qcontent if status != 5 else [Axx.group(3), Qcontent]) return index-1, None # 6: 定义列表 elif status == 6: if prefix == ':': Axx = RE_QUICK_ALLS.fullmatch(line) if Axx: Qdefinition = Axx.group(2) # 列表项标号后的文本 if Qdefinition: Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) return Qindex+1, [self._mesilf(Qdefinition, True), Qresult] return index-1, None # 7: 配置解析 elif status == 7: Axx = RE_SECT.fullmatch(line) if Axx: Qcommand, Qtext = Axx.groups() Qindex, Qresult = self._love_you_forever(index+1, 2, indent+1) result.append([Qcommand, self._combin(Qtext, Qresult, True)]) jump = Qindex - index continue # 10: Dialog elif status == 10: # 0: 自己的话 # 1: 对方的话 # 2: 对方的话 - 指定名字 # 3: 系统提示 # 批注:但实际上渲染出来,2和1是会合并的。 Axx = RE_DIALOG.fullmatch(line) if Axx: Qme, Qyou, Qname, Qsys, Qmessage = Axx.groups() if Qme: Qtype = 0 Qfeat = Qme[1] elif Qyou: Qname = Qname and Qname.strip().replace('@@', '@') Qtype = bool(Qname) + 1 Qfeat = Qyou[0] elif Qsys: Qtype = 3 Qfeat = None else: continue Qargs = {} if Qtype < 3 and Qfeat != '-': # $!~? Qargs['typ'] = ('hongbao', 'failed', 'voice', 'sending')[ord(Qfeat)&3] if Qfeat in {'~', '$'}: try: Qvalue = int(Qmessage) if Qfeat == '~' else int(float(Qmessage) * 100) except (TypeError, ValueError): continue else: if (Qtype == '~' and not 2 <= Qvalue <= 60) \ or (Qtype == '$' and not 0 <= Qvalue): continue Qargs['val'] = Qvalue Qindex, Qresult = self._love_you_forever(index+1, 1, indent+1) if Qfeat not in {'~', '$'}: Qresult = self._combin(Qmessage, Qresult) if not Qresult: continue result.append([Qtype, Qresult, Qargs] + ([Qname] if Qname else [])) jump = Qindex - index continue # 2: 不解析 if status == 2: result.extend(repeat('', more)) result.append(line) more = 0 # ?: 没有什么特色块 else: result.append(self._mesilf(line)) if status == 0: return index # *注释状态*调用,只需返回新索引 elif indent == 0: return result # 表明这是顶层调用,返回结果 else: return index, result # 表明这是内部调用,返回结果和新索引
#!/usr/bin/env python import sys import itertools import math if __name__ == "__main__": for (i, l) in enumerate([l.rstrip("\n") for l in sys.stdin.readlines()[1:]]): (k, c, s) = map(int, l.split(" ")) kk = range(k) if s * c >= k: kki = [ itertools.islice(itertools.cycle(kk), 0, math.ceil(k / c) * c) ] * c tiles = list( map(lambda x: list(filter(lambda x: x != None, x)), itertools.zip_longest(*kki))) solution = [ 1 + sum(map(lambda x: x[1] * (k**x[0]), enumerate(l))) for l in tiles ] print("Case #" + str(i + 1) + ": " + " ".join(map(str, solution))) else: print("Case #" + str(i + 1) + ": " + "IMPOSSIBLE")
def search_entities(self, query_s, limit=100, offset=0, raw_query=False, **filters): """ Solr edismax query parser syntax. :param query_s: a query string of search terms (e.g. - sales quarterly); Currently the search will perform an OR boolean search for all terms (split on whitespace), against a whitelist of search_fields. """ sources = filters.get('sources', []) default_entity_types, entity_types = self._get_types_from_sources(sources) try: params = self.__params if not raw_query: query_s = query_s.replace('{', '\\{').replace('}', '\\}').replace('(', '\\(').replace(')', '\\)').replace('[', '\\[').replace(']', '\\]') search_terms = [term for term in query_s.strip().split()] query_clauses = [] user_filters = [] source_type_filter = [] for term in search_terms: if ':' not in term: if ('sql' in sources or 'hive' in sources or 'impala' in sources): if '.' in term: parent, term = term.rsplit('.', 1) user_filters.append('parentPath:"/%s"' % parent.replace('.', '/')) query_clauses.append(self._get_boosted_term(term)) else: name, val = term.split(':') if val: if name == 'type': term = '%s:%s' % (name, val.upper().strip('*')) default_entity_types = entity_types # Make sure type value still makes sense for the source user_filters.append(term + '*') # Manual filter allowed e.g. type:VIE* ca filter_query = '*' if query_clauses: filter_query = 'OR'.join(['(%s)' % clause for clause in query_clauses]) user_filter_clause = 'AND '.join(['(%s)' % f for f in user_filters]) or '*' source_filter_clause = 'OR'.join(['(%s:%s)' % ('type', entity_type) for entity_type in default_entity_types]) if 's3' in sources: source_type_filter.append('sourceType:s3') elif 'sql' in sources or 'hive' in sources or 'impala' in sources: source_type_filter.append('sourceType:HIVE OR sourceType:IMPALA') filter_query = '%s AND (%s) AND (%s)' % (filter_query, user_filter_clause, source_filter_clause) if source_type_filter: filter_query += ' AND (%s)' % 'OR '.join(source_type_filter) source_ids = get_cluster_source_ids(self) if source_ids: filter_query = source_ids + '(' + filter_query + ')' else: filter_query = query_s params += ( ('query', filter_query), ('offset', offset), ('limit', NAVIGATOR.FETCH_SIZE_SEARCH.get()), ) LOG.info(params) response = self._root.get('entities', headers=self.__headers, params=params) response = list(islice(self._secure_results(response), limit)) # Apply Sentry perms return response except RestException, e: LOG.error('Failed to search for entities with search query: %s' % query_s) if e.code == 401: raise CatalogAuthException(_('Failed to authenticate.')) else: raise CatalogApiException(e)
def display(self, number=25): ids = (i.id for i in itertools.islice(self.invalid, number)) cont = ', ...' if number < self.invalid_count else '' print(' %s%s' % (', '.join(ids), cont))
def find_variable_info(vname, scope, top=None): if top is None: top = len(scope.locals) candidates = (vinfo for vinfo in itl.islice(scope.locals, top) if vinfo.name == vname) return next(candidates, None)
def assertPrefixEqual(self, expected, actual_iter): """Consumes len(expected) items from the given iter, and asserts that they match, in order. :API: public """ self.assertEqual(expected, list(itertools.islice(actual_iter, len(expected))))
def search_entities_interactive(self, query_s=None, limit=100, offset=0, facetFields=None, facetPrefix=None, facetRanges=None, filterQueries=None, firstClassEntitiesOnly=None, sources=None): try: pagination = { 'offset': offset, 'limit': NAVIGATOR.FETCH_SIZE_SEARCH_INTERACTIVE.get(), } f = { "outputFormat" : { "type" : "dynamic" }, "name" : { "type" : "dynamic" }, "lastModified" : { "type" : "date" }, "sourceType" : { "type" : "dynamic" }, "parentPath" : { "type" : "dynamic" }, "lastAccessed" : { "type" : "date" }, "type" : { "type" : "dynamic" }, "sourceId" : { "type" : "dynamic" }, "partitionColNames" : { "type" : "dynamic" }, "serDeName" : { "type" : "dynamic" }, "created" : { "type" : "date" }, "fileSystemPath" : { "type" : "dynamic" }, "compressed" : { "type" : "bool" }, "clusteredByColNames" : { "type" : "dynamic" }, "originalName" : { "type" : "dynamic" }, "owner" : { "type" : "dynamic" }, "extractorRunId" : { "type" : "dynamic" }, "userEntity" : { "type" : "bool" }, "sortByColNames" : { "type" : "dynamic" }, "inputFormat" : { "type" : "dynamic" }, "serDeLibName" : { "type" : "dynamic" }, "originalDescription" : { "type" : "dynamic" }, "lastModifiedBy" : { "type" : "dynamic" } } auto_field_facets = ["tags", "type"] + f.keys() query_s = (query_s.strip() if query_s else '') + '*' last_query_term = [term for term in query_s.split()][-1] if last_query_term and last_query_term != '*': last_query_term = last_query_term.rstrip('*') (fname, fval) = last_query_term.split(':') if ':' in last_query_term else (last_query_term, '') auto_field_facets = [f for f in auto_field_facets if f.startswith(fname)] facetFields = facetFields or auto_field_facets[:5] entity_types = [] fq_type = [] if filterQueries is None: filterQueries = [] if sources: default_entity_types, entity_types = self._get_types_from_sources(sources) if 'sql' in sources or 'hive' in sources or 'impala' in sources: fq_type = default_entity_types filterQueries.append('sourceType:HIVE OR sourceType:IMPALA') elif 'hdfs' in sources: fq_type = entity_types elif 's3' in sources: fq_type = default_entity_types filterQueries.append('sourceType:s3') if query_s.strip().endswith('type:*'): # To list all available types fq_type = entity_types search_terms = [term for term in query_s.strip().split()] if query_s else [] query = [] for term in search_terms: if ':' not in term: query.append(self._get_boosted_term(term)) else: name, val = term.split(':') if val: # Allow to type non default types, e.g for SQL: type:FIEL* if name == 'type': # Make sure type value still makes sense for the source term = '%s:%s' % (name, val.upper()) fq_type = entity_types if name.lower() not in ['type', 'tags', 'owner', 'originalname', 'originaldescription', 'lastmodifiedby']: # User Defined Properties are prefixed with 'up_', i.e. "department:sales" -> "up_department:sales" query.append('up_' + term) else: filterQueries.append(term) filterQueries.append('deleted:false') body = {'query': ' '.join(query) or '*'} if fq_type: filterQueries += ['{!tag=type} %s' % ' OR '.join(['type:%s' % fq for fq in fq_type])] source_ids = get_cluster_source_ids(self) if source_ids: body['query'] = source_ids + '(' + body['query'] + ')' body['facetFields'] = facetFields or [] # Currently mandatory in API if facetPrefix: body['facetPrefix'] = facetPrefix if facetRanges: body['facetRanges'] = facetRanges if filterQueries: body['filterQueries'] = filterQueries if firstClassEntitiesOnly: body['firstClassEntitiesOnly'] = firstClassEntitiesOnly data = json.dumps(body) LOG.info(data) response = self._root.post('interactive/entities?limit=%(limit)s&offset=%(offset)s' % pagination, data=data, contenttype=_JSON_CONTENT_TYPE, clear_cookies=True) response['results'] = list(islice(self._secure_results(response['results']), limit)) # Apply Sentry perms return response except RestException, e: LOG.error('Failed to search for entities with search query: %s' % json.dumps(body)) if e.code == 401: raise CatalogAuthException(_('Failed to authenticate.')) else: raise CatalogApiException(e.message)
# Arguments infile = sys.argv[1] # input fasta file # Align using parasail # Read the first two sequences # Set the first sequence as the query # Set the second sequence as the target # Ignore other sequence in input file count = 0 query_name = "" query_seq = "" target_name = "" target_seq = "" with pysam.FastxFile(infile) as fa_in: for cnt, read in enumerate(islice(fa_in, None)): count += 1 if count == 1: # query query_seq = read.sequence query_name = read.name elif count == 2: # target target_seq = read.sequence target_name = read.name else: # stop break # set score gap_open, gap_extend = 8, 4 score_matrix = parasail.matrix_create("ACGT", 5, -5) # semi-global alignment
def _chunker(self, iterable, chunk_size): while True: yield itertools.chain([next(iterable)], itertools.islice(iterable, chunk_size - 1))
def train_from_state( self, state: TrainingState, training_data: DataLoader, eval_data: DataLoader, metric_reporter: MetricReporter, train_config: PyTextConfig, ) -> Tuple[torch.nn.Module, Any]: """ Train and eval a model from a given training state will be modified. This function iterates epochs specified in config, and for each epoch do: 1. Train model using training data, aggregate and report training results 2. Adjust learning rate if scheduler is specified 3. Evaluate model using evaluation data 4. Calculate metrics based on evaluation results and select best model Args: training_state (TrainingState): contrains stateful information to be able to restore a training job train_iter (DataLoader): batch iterator of training data eval_iter (DataLoader): batch iterator of evaluation data model (Model): model to be trained metric_reporter (MetricReporter): compute metric based on training output and report results to console, file.. etc train_config (PyTextConfig): training config Returns: model, best_metric: the trained model together with the best metric """ training_data = self.set_up_training(state, training_data) model = state.model rank = state.rank trainable_params = sum(p.numel() for p in state.model.parameters() if p.requires_grad) print(f"Model :{model}") print(f"Num trainable parameters: {trainable_params}") while self.continue_training(state): state.epoch += 1 state.epochs_since_last_improvement += 1 lrs = learning_rates(state.optimizer) print(f"\nWorker {state.rank} starting epoch {state.epoch}") print(f"Learning rate(s): {', '.join(map(str, lrs))}") with timing.time("train epoch"): state.stage = Stage.TRAIN state.model.train() print(f"start training epoch {state.epoch}") epoch_data = training_data if self.config.num_batches_per_epoch: # We want to limit the number of batches in the epoch; # equivalent to epoch_data[:num_batches_per_epoch] for iterators. # In this case we set the training data iterator to cycle earlier # in the training process, so when it reaches the end it will # loop back to the beginning. epoch_data = itertools.islice( epoch_data, self.config.num_batches_per_epoch) self.run_epoch(state, epoch_data, metric_reporter) if not self.config.do_eval: continue with timing.time("eval epoch"): state.stage = Stage.EVAL model.eval() print(f"start evaluating epoch {state.epoch}") with torch.no_grad(): eval_metric = self.run_epoch(state, eval_data, metric_reporter) # Step the learning rate scheduler(s) assert eval_metric is not None state.scheduler.step_epoch( metrics=metric_reporter.get_model_select_metric(eval_metric), epoch=state.epoch, ) # Did we train a better model? better_model = metric_reporter.compare_metric( eval_metric, state.best_model_metric) if better_model: self.update_best_model(state, train_config, eval_metric) if better_model or train_config.save_all_checkpoints: self.save_checkpoint(state, train_config) if self.optimizer.finalize(): should_update_model = True eval_metric = None if self.config.do_eval: state.stage = Stage.EVAL model.eval() print("start evaluating finalized state") with torch.no_grad(): eval_metric = self.run_epoch(state, eval_data, metric_reporter) should_update_model = metric_reporter.compare_metric( eval_metric, state.best_model_metric) if should_update_model: self.update_best_model(state, train_config, eval_metric) if should_update_model or train_config.save_all_checkpoints: self.save_checkpoint(state, train_config) # Only bother loading the best model for master worker if (rank == 0 and state.best_model_state is not None and self.config.load_best_model_after_train): self.load_best_model(state) return state.model, state.best_model_metric
def get(self, start_idx: int, end_idx: int) -> Transition: transitions = list(itertools.islice(self._memory, start_idx, end_idx)) return Transition(*zip(*transitions))
import gmpy2 from itertools import islice def primes(): n = 2 while True: yield n n = gmpy2.next_prime(n) prime_numbers = primes() print(next(islice(prime_numbers, 10000, None)))
def test_WebLoader_two_tiers(): wl = loader.WebLoader("testdata/sample.tgz", 90, fields="png cls".split()) cls1 = [sample[1] for sample in itt.islice(wl, 0, 10)] cls2 = [sample[1] for sample in itt.islice(wl, 0, 10)] assert cls1 == cls2, (cls1, cls2)
def take_dict(n, iterable): "Return first n items of the iterable as a list" return dict(islice(iterable, n))
def partition(iterable, parts): return [ list(islice(it, i, None, parts)) for i, it in enumerate(tee(iterable, parts)) ]
def get_trees(*slice_args): pattern = os.path.join(DATA_PATH, '*.html') loader = webstruct.WebAnnotatorLoader() _trees_iter = webstruct.load_trees(pattern, loader) return list(islice(_trees_iter, *slice_args))
def get_type(arg: Iterable): """Return generic type checking first value.""" return subtype(type(arg), *map(get_type, itertools.islice(arg, 1)))
def incrementing_payload(length): return bytearray(itertools.islice(itertools.cycle(range(256)), length))
from itertools import islice def fib(): a, b = 0, 1 while True: yield a a, b = b, a + b fib_list = list(islice(fib(), 10)) print(fib_list)
import itertools with open("A-large.in", "rt") as infile: COUNT = int(infile.readline()) X_WIN_SET = {'T', 'X'} O_WIN_SET = {'T', 'O'} for i in range(1, COUNT + 1): rows = list(itertools.islice((line.rstrip() for line in infile), 4)) infile.readline() winner = None diagonal1 = set() diagonal2 = set() for j, row in enumerate(rows): diagonal1.add(row[j]) diagonal2.add(row[-(j + 1)]) if set(row) <= X_WIN_SET: winner = 'X' break if set(row) <= O_WIN_SET: winner = 'O' break if winner is None: for j, col in enumerate(zip(*rows)): if set(col) <= X_WIN_SET: winner = 'X'
def sitemap_xml_index(self): cr, uid, context = request.cr, openerp.SUPERUSER_ID, request.context ira = request.registry['ir.attachment'] iuv = request.registry['ir.ui.view'] mimetype = 'application/xml;charset=utf-8' content = None def create_sitemap(url, content): ira.create(cr, uid, dict( datas=content.encode('base64'), mimetype=mimetype, type='binary', name=url, url=url, ), context=context) sitemap = ira.search_read(cr, uid, [('url', '=', '/sitemap.xml'), ('type', '=', 'binary')], ('datas', 'create_date'), context=context) if sitemap: # Check if stored version is still valid server_format = openerp.tools.misc.DEFAULT_SERVER_DATETIME_FORMAT create_date = datetime.datetime.strptime(sitemap[0]['create_date'], server_format) delta = datetime.datetime.now() - create_date if delta < SITEMAP_CACHE_TIME: content = sitemap[0]['datas'].decode('base64') if not content: # Remove all sitemaps in ir.attachments as we're going to regenerated them sitemap_ids = ira.search(cr, uid, [('url', '=like', '/sitemap%.xml'), ('type', '=', 'binary')], context=context) if sitemap_ids: ira.unlink(cr, uid, sitemap_ids, context=context) pages = 0 first_page = None locs = request.website.enumerate_pages() while True: start = pages * LOC_PER_SITEMAP values = { 'locs': islice(locs, start, start + LOC_PER_SITEMAP), 'url_root': request.httprequest.url_root[:-1], } urls = iuv.render(cr, uid, 'website.sitemap_locs', values, context=context) if urls.strip(): page = iuv.render(cr, uid, 'website.sitemap_xml', dict(content=urls), context=context) if not first_page: first_page = page pages += 1 create_sitemap('/sitemap-%d.xml' % pages, page) else: break if not pages: return request.not_found() elif pages == 1: content = first_page else: # Sitemaps must be split in several smaller files with a sitemap index content = iuv.render(cr, uid, 'website.sitemap_index_xml', dict( pages=range(1, pages + 1), url_root=request.httprequest.url_root, ), context=context) create_sitemap('/sitemap.xml', content) return request.make_response(content, [('Content-Type', mimetype)])
def k_shortest_paths(G, source, target, k, weight=None): return list( islice(nx.shortest_simple_paths(G, source, target, weight=weight), k))
Ostinato = [ 'r4', 'r8', 'r16', 'fis16', 'r16', 'fis\'8.', 'r8', 'r16', 'e\'16', '|', 'r2', 'r4', 'r8', 'r16', 'fis16', '|', 'r16', 'fis\'8.', 'r8', 'r16', 'e\'16', 'r2', '|' ] MelodyTones = [ 'fis\'', 'a', 'd\'', 'b', 'b', 'd\'', 'g', 'es\'', 'es\'', 'fis\'', 'h', 'gis', 'gis,', 'h,', 'e,', 'c,', 'c', 'es,', 'as,', 'e', 'e\'', 'g', 'c\'', 'as', 'as,', 'h', 'e', 'c' ] BassTones = ['e\'', 'a', 'f', 'g', 'e,', 'e\'', 'a'] Fundamental = ['fis'] f = Fundamental[0] t = BassTones[0] for s in itertools.islice(MelodyTones, 0, 4): barOne = s + '4\ppp^\\tasto ' + s + '8[ ' + s + '-.] ' + s + '4.-- ' + f + '8 | ' barTwo = s + '8[ ' + s + '-.] ' + s + '4.-- ' + s + '8 ' + t + '4 | ' barThree = t + '8[ ' + t + '8-.]' + t + '4.-- ' + t + '8 ' + t + '8[ ' + t + '] |' print(barOne, barTwo, barThree) t = BassTones[1] for s in itertools.islice(MelodyTones, 4, 8): barOne = s + '4\ppp^\\tasto ' + s + '8[ ' + s + '-.] ' + s + '4.-- ' + f + '8 | ' barTwo = s + '8[ ' + s + '-.] ' + s + '4.-- ' + s + '8 ' + t + '4 | ' barThree = t + '8[ ' + t + '8-.]' + t + '4.-- ' + t + '8 ' + t + '8[ ' + t + '] |' print(barOne, barTwo, barThree) t = BassTones[2] for s in itertools.islice(MelodyTones, 8, 12): barOne = s + '4\ppp^\\tasto ' + s + '8[ ' + s + '-.] ' + s + '4.-- ' + f + '8 | '
def load_link(link): try: page = requests.post(link, headers=header, data=data, timeout=30) except: print('SOME ERROR!!!!!!!!!!! RETRY AFTER 10 SEC') time.sleep(10) page = load_link(link) return page count = 0 with open('../../../data/minjust.csv', 'r', newline='') as file: rows = csv.reader(file, delimiter='|') for row in islice(rows,1, 10000): count += 1 print(count) if row[8] == 'ИНН' or row[8] == '': continue print(row[8]) if len(row[8]) != 0: dif = 14 - len(row[8]) inn = ('0'*dif) + row[8] else: inn = '' print(inn) zapros = load_link(f'https://oldbudget.okmot.kg/inn_expense?code={inn}')
def train(self, data_loader, **kwargs): self.model.train() self.mode = 'train' if isinstance(data_loader, list): assert len(data_loader) == 2 and self.search_optimizer is not None self.search_data_loader = data_loader[1] data_loader = data_loader[0] self.data_loader = data_loader max_global_step = self._max_epochs * len(data_loader) * self.batchsize global_step_left = max_global_step - get_global_step() self._max_iters = self.iter + global_step_left // self.batchsize inner_iter_left = len(data_loader) if self._checkpoint_batchsize and self._checkpoint_inner_iter: checkpoint_data_loader_len = \ len(data_loader) * self.batchsize // self._checkpoint_batchsize checkpoint_inner_iter_left = checkpoint_data_loader_len - self._checkpoint_inner_iter inner_iter_left = \ checkpoint_inner_iter_left * self._checkpoint_batchsize // self.batchsize if inner_iter_left < 0: self.logger.warn( "Data loader length {} < inner iter {} of checkpoint". format(checkpoint_data_loader_len, self._checkpoint_inner_iter)) if inner_iter_left == -1: inner_iter_left = 0 self.logger.warn("We assume the last epoch has finished." "Start from new epoch.") else: inner_iter_left = len(data_loader) self._checkpoint_batchsize = None do_seach = False search_data_iter = None if self.search_optimizer is not None: assert hasattr(self, 'tune_epoch_start') and hasattr( self, 'tune_epoch_end') do_seach = self.tune_epoch_end >= self.epoch + 1 >= self.tune_epoch_start if do_seach: search_data_iter = iter(self.search_data_loader) self.model.module.reset_do_search(True) else: self.model.module.reset_do_search(False) self._max_inner_iter = len(data_loader) set_total_inner_iter(self._max_inner_iter) self.call_hook('before_train_epoch') # alternate training params and arch-params bar = tqdm(islice(enumerate(data_loader), 0, inner_iter_left), total=inner_iter_left, ncols=70) for i, data_batch in bar: self._inner_iter = i + len(data_loader) - inner_iter_left set_inner_iter(self._inner_iter) bar_desc = getattr(self, 'task_name', 'none') if do_seach: bar_desc = bar_desc + " Searching" try: search_data_batch = next(search_data_iter) except StopIteration: search_data_iter = iter(self.search_data_loader) search_data_batch = next(search_data_iter) search_outputs = self.batch_processor(self.model, search_data_batch, train_mode=True, **kwargs) self.search_outputs = search_outputs self.call_hook('after_val_iter') bar.set_description(bar_desc) self.call_hook('before_train_iter') outputs = self.batch_processor(self.model, data_batch, train_mode=True, **kwargs) if not isinstance(outputs, dict): raise TypeError('batch_processor() must return a dict') if 'log_vars' in outputs: self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) self.log_buffer.update(get_summary()) self.outputs = outputs self.call_hook('after_train_iter') update_global_step(self.batchsize) self._iter += 1 self.call_hook('after_train_epoch') if do_seach: # it may cause undefined behavior if mmdetection adds hooks with 'after_val_epoch' self.call_hook('after_val_epoch') self._epoch += 1