def add( self, added=None ): try : iter( added ) except TypeError, te : added = [ added ] offsetX = self.__margin[1] offsetY = 0 for item in added : item.setParent( self.layout ) item.setVisible( True ) x = item.geometry().x() y = item.geometry().y() w = item.geometry().width() h = item.geometry().height() item.setGeometry( offsetX, self.linesH, w, h ) # increment line offset horizontal offsetX += w + 10 if offsetY < h : offsetY = h # increment line offset height and width self.linesH += offsetY + 5 if self.linesW < offsetX: self.linesW = offsetX # resize the layout in the case of a scroll if self.__scroll : self.layout.resize( self.linesW+self.__margin[0], self.linesH+self.__margin[1] )
def open_sam(sam_filename, samtype): if samtype == "sam": SAM_or_BAM_Reader = HTSeq.SAM_Reader elif samtype == "bam": SAM_or_BAM_Reader = HTSeq.BAM_Reader else: raise ValueError("Unknown input format %s specified." % samtype) try: if sam_filename != "-": read_seq_file = SAM_or_BAM_Reader(sam_filename) read_seq = read_seq_file first_read = iter(read_seq).next() else: read_seq_file = SAM_or_BAM_Reader(sys.stdin) read_seq_iter = iter(read_seq_file) first_read = read_seq_iter.next() read_seq = itertools.chain([first_read], read_seq_iter) pe_mode = first_read.paired_end except: msg = "Error occured when reading beginning of SAM/BAM file.\n" sys.stderr.write(msg) raise try: yield (pe_mode, read_seq) except: sys.stderr.write("Error occured when processing SAM input (%s):\n" % read_seq_file.get_line_number_string()) raise
def test_gen(self): g = regen(iter(list(range(10)))) assert g[7] == 7 assert g[6] == 6 assert g[5] == 5 assert g[4] == 4 assert g[3] == 3 assert g[2] == 2 assert g[1] == 1 assert g[0] == 0 assert g.data, list(range(10)) assert g[8] == 8 assert g[0] == 0 g = regen(iter(list(range(10)))) assert g[0] == 0 assert g[1] == 1 assert g.data == list(range(10)) g = regen(iter([1])) assert g[0] == 1 with pytest.raises(IndexError): g[1] assert g.data == [1] g = regen(iter(list(range(10)))) assert g[-1] == 9 assert g[-2] == 8 assert g[-3] == 7 assert g[-4] == 6 assert g[-5] == 5 assert g[5] == 5 assert g.data == list(range(10)) assert list(iter(g)) == list(range(10))
def test_admin_setup(self): # PUTs for account and 16 .hash's self.test_origin.app = FakeApp(iter( [('204 No Content', {}, '') for i in xrange(102)])) resp = Request.blank('/origin/.prep', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Origin-Admin-User': '******', 'X-Origin-Admin-Key': 'unittest'}).get_response( self.test_origin) self.assertEquals(resp.status_int, 204) self.assertEquals(self.test_origin.app.calls, 101) self.test_origin.app = FakeApp(iter( [('404 Not Found', {}, '')])) req = Request.blank('/origin/.prep', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Origin-Admin-User': '******', 'X-Origin-Admin-Key': 'unittest'}) self.assertRaises(Exception, req.get_response, self.test_origin) self.test_origin.app = FakeApp(iter( [('204 No Content', {}, ''), ('404 Not Found', {}, '')])) req = Request.blank('/origin/.prep', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Origin-Admin-User': '******', 'X-Origin-Admin-Key': 'unittest'}) self.assertRaises(Exception, req.get_response, self.test_origin)
def test_bison_lalr_repr_automaton_lr0(): ex = grammar_examples.lr0.ex_minimal1 grammar = ex.grammar automaton = compute_automaton(grammar) assert repr(automaton) == '<Automaton with 4 states>' assert repr(automaton._data) == ''' [<StateData #0 with 1 actions, 1 gotos <bison.ItemSet #0, size 2 < $accept → • Root $eof ∥ > < Root → • term ∥ > >>, <StateData #1 with 1 actions, 0 gotos <bison.ItemSet #1, size 1 < Root → term • ∥ { $eof } > >>, <StateData #2 with 1 actions, 0 gotos <bison.ItemSet #2, size 1 < $accept → Root • $eof ∥ > >>, <StateData #3 with 2 actions, 0 gotos <bison.ItemSet #3, size 1 < $accept → Root $eof • ∥ > >>] '''.strip().replace('•', _mdot).replace('∥', _parallel) assert repr(automaton._data[0]._id) == ''' <StateId for <StateData #0 with 1 actions, 1 gotos <bison.ItemSet #0, size 2 < $accept → • Root $eof ∥ > < Root → • term ∥ > >>> '''.strip().replace('•', _mdot).replace('∥', _parallel) assert repr(next(iter(automaton._data[0]._actions.values()))) == 'Shift(<state 1>)' assert repr(next(iter(automaton._data[1]._actions.values()))) == 'Reduce(<rule 1>)' assert repr(next(iter(automaton._data[0]._gotos.values()))) == 'Goto(<state 2>)'
def __reduce__(self): return (PickleProtocol2ReduceListitemsAppend, # callable (), # args {}, # state iter(['foo', 'bar']), # listitems iter([]), # dictitems )
def select_map(self, latitude, longitude): """ Find and display a nearby track by latitude / longitude The selection will favor a previously selected track in the nearby area :param latitude :type latitude float :param longitude :type longitude float :returns the selected track, or None if there are no nearby tracks :type Track """ if not latitude or not longitude: return None point = GeoPoint.fromPoint(latitude, longitude) nearby_tracks = self.track_manager.find_nearby_tracks(point) saved_tracks = self.get_pref_track_selections() saved_nearby_tracks = [t for t in nearby_tracks if t.track_id in saved_tracks] # select the saved nearby track or just a nearby track track = next(iter(saved_nearby_tracks), None) track = next(iter(nearby_tracks), None) if track is None else track if self.track != track: # only update the trackmap if it's changing self._select_track(track) return track
def __reduce__(self): return (type(self), # callable ('yam', 1), # args {'foo': 1}, # state iter([]), # listitems iter([]), # dictitems )
def __reduce__(self): return (PickleProtocol2ReduceTuple, # callable ('yam', 1), # args {'foo': 1}, # state iter([]), # listitems iter([]), # dictitems )
def __reduce__(self): return (protocol_2_reduce_tuple_func, # callable ('yam', 1), # args None, # state iter([]), # listitems iter([]), # dictitems )
def __reduce__(self): return (__newobj__, # callable (PickleProtocol2ReduceNewobj, 'yam', 1), # args None, # state iter([]), # listitems iter([]), # dictitems )
def dfs(G,source=None): """Produce edges in a depth-first-search starting at source. Edges are tagged as either 'tree' or 'back'""" # Very slight modification of the DFS procedure from networkx # One could unify this with compute_information, but it seemed cleaner this way if source is None: # produce edges for all components nodes=G else: # produce edges for components with source nodes=[source] visited=set() for start in nodes: if start in visited: continue visited.add(start) stack = [(start,iter(G[start]))] while stack: parent,children = stack[-1] try: child = next(children) if child not in visited: yield parent,child,'tree' visited.add(child) stack.append((child,iter(G[child]))) else: yield parent,child,'back' except StopIteration: stack.pop()
def train(self, inp, out, training_weight=1.): inp = np.mat(inp).T out = np.mat(out).T deriv = [] val = inp vals = [val] # forward calculation of activations and derivatives for weight,bias in self.__weights: val = weight*val val += bias deriv.append(self.__derivative(val)) vals.append(self.__activation(val)) deriv = iter(reversed(deriv)) weights = iter(reversed(self.__weights)) errs = [] errs.append(np.multiply(vals[-1]-out, next(deriv))) # backwards propagation of errors for (w,b),d in zip(weights, deriv): errs.append(np.multiply(np.dot(w.T, errs[-1]), d)) weights = iter(self.__weights) for (w,b),v,e in zip(\ self.__weights,\ vals, reversed(errs)): e *= self.__learning_rate*training_weight w -= e*v.T b -= e tmp = vals[-1]-out return np.dot(tmp[0].T,tmp[0])*.5*training_weight
def _isnotsuite(test): "A crude way to tell apart testcases and suites with duck-typing" try: iter(test) except TypeError: return True return False
def test_count(): assert count((1, 2, 3)) == 3 assert count([]) == 0 assert count(iter((1, 2, 3, 4))) == 4 assert count("hello") == 5 assert count(iter("hello")) == 5
def create_file(self): # Expanding summits tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions") tfbs_summit_regions.read_bed(self.tfbs_summit_fname) for region in iter(tfbs_summit_regions): summit = int(region.data.split()[-1]) + region.initial region.initial = max(summit - (self.peak_ext / 2), 0) region.final = summit + (self.peak_ext / 2) # Calculating intersections mpbs_regions = GenomicRegionSet("MPBS Regions") mpbs_regions.read_bed(self.mpbs_fname) tfbs_summit_regions.sort() mpbs_regions.sort() with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL) without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True) tfbs_regions = GenomicRegionSet("TFBS Regions") for region in iter(with_overlap_regions): region.name = region.name.split(":")[0] + ":Y" tfbs_regions.add(region) for region in iter(without_overlap_regions): region.name = region.name.split(":")[0] + ":N" tfbs_regions.add(region) tfbs_regions.sort() tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name)) tfbs_regions.write_bed(tfbs_fname)
def run(self): container_loaded.wait() container_loaded.clear() link = Link.from_string(sys.argv[3]) if link.type() == Link.LINK_TRACK: track = link.as_track() itrack = iter([track]) elif link.type() == Link.LINK_PLAYLIST: playlist = link.as_playlist() print('loading playlist...') while not playlist.is_loaded(): time.sleep(0.1) print('done') itrack = iter(playlist) session = self.ripper.session for track in itrack: self.ripper.load_track(track) rip_init(session, track) self.ripper.play() end_of_track.wait() end_of_track.clear() rip_terminate(session, track) rip_id3(session, track) self.ripper.disconnect()
async def async_step_import(self, user_input): """Import a config entry.""" if self.hass.config_entries.async_entries(DOMAIN): return self.async_abort(reason='already_setup') self._scan_interval = user_input[KEY_SCAN_INTERVAL] if user_input[CONF_HOST] != DOMAIN: self._hosts.append(user_input[CONF_HOST]) if not await self.hass.async_add_executor_job( os.path.isfile, self.hass.config.path(TELLDUS_CONFIG_FILE)): return await self.async_step_user() conf = await self.hass.async_add_executor_job( load_json, self.hass.config.path(TELLDUS_CONFIG_FILE)) host = next(iter(conf)) if user_input[CONF_HOST] != host: return await self.async_step_user() host = CLOUD_NAME if host == 'tellduslive' else host return self.async_create_entry( title=host, data={ CONF_HOST: host, KEY_SCAN_INTERVAL: self._scan_interval.seconds, KEY_SESSION: next(iter(conf.values())), })
def load_transactions_mock(input_file, **kwargs): """ Mock for apyori.load_transactions. """ eq_(kwargs['delimiter'], delimiter) eq_(next(input_file), inputs[0]) yield iter(input_transactions[0]) eq_(next(input_file), inputs[1]) yield iter(input_transactions[1])
def setUp(self) : random.seed(123) self.ids_str = iter([('1', '2'), ('2', '3'), ('4', '5'), ('6', '7'), ('8','9')]) self.records = iter([({'name': 'Margret', 'age': '32'}, {'name': 'Marga', 'age': '33'}), \ ({'name': 'Marga', 'age': '33'}, {'name': 'Maria', 'age': '19'}), \ ({'name': 'Maria', 'age': '19'}, {'name': 'Monica', 'age': '39'}), \ ({'name': 'Monica', 'age': '39'}, {'name': 'Mira', 'age': '47'}), \ ({'name': 'Mira', 'age': '47'}, {'name': 'Mona', 'age': '9'}), ]) self.normalizedAffineGapDistance = dedupe.affinegap.normalizedAffineGapDistance self.data_model = {} self.data_model['fields'] = dedupe.core.OrderedDict() v = {} v.update({'Has Missing': False, 'type': 'String', 'comparator': self.normalizedAffineGapDistance, \ 'weight': -1.0302742719650269}) self.data_model['fields']['name'] = v self.data_model['bias'] = 4.76 score_dtype = [('pairs', 'S1', 2), ('score', 'f4', 1)] self.desired_scored_pairs = numpy.array([(['1', '2'], 0.96), (['2', '3'], 0.96), \ (['4', '5'], 0.78), (['6', '7'], 0.72), \ (['8', '9'], 0.84)], dtype=score_dtype)
def set_current_draw_pattern(self, pattern, control): try: iter(pattern) except TypeError: self.draw_pattern = [pattern] else: self.draw_pattern = pattern
def GetEntries(self, parser_mediator, match=None, **unused_kwargs): """Extract device information from the iPod plist. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. match (Optional[dict[str: object]]): keys extracted from PLIST_KEYS. """ devices = match.get('Devices', {}) for device_identifier, device_information in iter(devices.items()): datetime_value = device_information.get('Connected', None) if not datetime_value: continue event_data = IPodPlistEventData() event_data.device_id = device_identifier # TODO: refactor. for key, value in iter(device_information.items()): if key == 'Connected': continue attribute_name = key.lower().replace(' ', '_') setattr(event_data, attribute_name, value) event = time_events.PythonDatetimeEvent( datetime_value, definitions.TIME_DESCRIPTION_LAST_CONNECTED) parser_mediator.ProduceEventWithEventData(event, event_data)
def tag_info_chart (self): """ Make the taginfo.txt plot """ ## TODO: human chrs on hg19. How will this work with GRCh genome or other, non human, genomes? # nice if they are ordered by size ucsc = ["chr" + str(i) for i in range(1,23)].append([ "chrX", "chrY", "chrM"]) ensembl = list(range(1,23)).append([ "X", "Y", "MT"]) pconfig = { 'id': 'tagInfo', 'title': 'Homer: Tag Info Distribution', 'ylab': 'Tags', 'cpswitch_counts_label': 'Number of Tags' } ## check if chromosomes starts with "chr" (UCSC) or "#" (ensembl) sample1 = next(iter(self.tagdir_data['taginfo_total'])) chrFormat = next(iter(self.tagdir_data['taginfo_total'][sample1])) if ("chr" in chrFormat): chrs = ucsc else: chrs = ensembl return bargraph.plot(self.tagdir_data['taginfo_total'], chrs, pconfig)
def __init__(self, iterator, unit=None, computer_prefix=None, display=MULTI_LINE): """Create a new progress display. 'iterator' is the iterator containing the work to be done. 'unit' is the unit to be displayed to the user. 'computer_prefix' should be set to True if this unit requires prefix increments of 1024 instead of the traditional 1000. If it is not set, then the class tries to guess based on 'unit'. 'display' defaults to MULTI_LINE to print a new line for every update, or can be SINGLE_LINE to keep updating a single status line. """ if hasattr(iterator, "__len__"): # This may be an expensive operation, for instance on a # hypothetical os.walk() which implements __len__. length = len(iterator) self.iterator = iter(iterator) else: list = [] # TODO: isn't there some kind of builtin expand operation? for i in iterator: list.append(i) length = len(list) self.iterator = iter(list) self.progress = Progress(length, unit, computer_prefix) self.display = display # The first call to next is before the work actually starts, so we # shouldn't increment() at that point. self.first = True
def __iter__(self): # Avoid usage of zip() below since it will consume one item too many. it1, it2 = iter(self.reader1), iter(self.reader2) while True: try: r1 = next(it1) except StopIteration: # End of file 1. Make sure that file 2 is also at end. try: next(it2) raise FormatError("Reads are improperly paired. There are more reads in file 2 than in file 1.") except StopIteration: pass break try: r2 = next(it2) except StopIteration: raise FormatError("Reads are improperly paired. There are more reads in file 1 than in file 2.") name1 = r1.name.split(None, 1)[0] name2 = r2.name.split(None, 1)[0] if name1[-2:-1] == '/': name1 = name1[:-2] if name2[-2:-1] == '/': name2 = name2[:-2] if name1 != name2: raise FormatError("Reads are improperly paired. Read name '{0}' in file 1 not equal to '{1}' in file 2.".format(name1, name2)) yield (r1, r2)
def test_cdn_get_no_content(self): prev_data = json.dumps({'account': 'acc', 'container': 'cont', 'ttl': 1234, 'logs_enabled': True, 'cdn_enabled': True}) self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, prev_data), # call to _get_cdn_data ('304 No Content', {}, '')])) #call to get obj req = Request.blank('http://1234.r34.origin_cdn.com:8080/obj1.jpg', environ={'REQUEST_METHOD': 'HEAD', 'swift.cdn_hash': 'abcd', 'swift.cdn_object_name': 'obj1.jpg'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 304) self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, prev_data), # call to _get_cdn_data ('404 No Content', {}, '')])) #call to get obj req = Request.blank('http://1234.r34.origin_cdn.com:8080/obj1.jpg', environ={'REQUEST_METHOD': 'HEAD', 'swift.cdn_hash': 'abcd', 'swift.cdn_object_name': 'obj1.jpg'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 404) self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, prev_data), # call to _get_cdn_data ('416 No Content', {}, '')])) #call to get obj req = Request.blank('http://1234.r34.origin_cdn.com:8080/obj1.jpg', environ={'REQUEST_METHOD': 'HEAD', 'swift.cdn_hash': 'abcd', 'swift.cdn_object_name': 'obj1.jpg'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 416)
def test_cdn_get_regex(self): prev_data = json.dumps({'account': 'acc', 'container': 'cont', 'ttl': 1234, 'logs_enabled': True, 'cdn_enabled': True}) def check_urls(req): vrs, acc, cont, obj = utils.split_path(req.path, 1, 4) self.assertEquals(acc, 'acc') self.assertEquals(cont, 'cont') self.assertEquals(obj, 'obj1.jpg') self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, prev_data), # call to _get_cdn_data ('304 No Content', {}, '', check_urls)])) #call to get obj req = Request.blank('http://1234.r3.origin_cdn.com:8080/obj1.jpg', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 304) self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, prev_data), # call to _get_cdn_data ('304 No Content', {}, '', check_urls)])) #call to get obj req = Request.blank('http://r3.origin_cdn.com:8080/nohash/obj1.jpg', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 404)
def open_moinpage_part(self, elem): type = elem.get(moin_page.content_type, u"").split(u';') if len(type) == 2: if type[0] == u"x-moin/macro": if len(elem) and iter(elem).next().tag.name == "arguments": alt = u"<<{0}({1})>>".format(type[1].split(u'=')[1], u','.join( [u''.join(c.itertext()) for c in iter(elem).next() if c.tag.name == "argument"])) else: alt = u"<<{0}()>>".format(type[1].split(u'=')[1]) obj = u".. |{0}| macro:: {1}".format(alt, alt) self.objects.append(obj) return u" |{0}| ".format(alt) elif type[0] == u"x-moin/format": elem_it = iter(elem) ret = u"\n\n.. parser:{0}".format(type[1].split(u'=')[1]) if len(elem) and elem_it.next().tag.name == "arguments": args = [] for arg in iter(elem).next(): if arg.tag.name == "argument": args.append(u"{0}=\"{1}\"".format(arg.get(moin_page.name, u""), u' '.join(arg.itertext()))) ret = u'{0} {1}'.format(ret, u' '.join(args)) elem = elem_it.next() ret = u"{0}\n {1}".format(ret, u' '.join(elem.itertext())) return ret return elem.get(moin_page.alt, u'') + u"\n"
def test_origin_db_post_fail(self): self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, ''), # call to _get_cdn_data ('404 Not Found', {}, ''), # put to .hash ])) req = Request.blank('http://origin_db.com:8080/v1/acc/cont', environ={'REQUEST_METHOD': 'PUT'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 500) self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, ''), # call to _get_cdn_data ('204 No Content', {}, ''), # put to .hash ('404 Not Found', {}, ''), # HEAD check to list container ('404 Not Found', {}, ''), # PUT to list container ])) req = Request.blank('http://origin_db.com:8080/v1/acc/cont', environ={'REQUEST_METHOD': 'PUT'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 500) self.test_origin.app = FakeApp(iter([ ('204 No Content', {}, ''), # call to _get_cdn_data ('204 No Content', {}, ''), # put to .hash ('204 No Content', {}, ''), # HEAD check to list container ('404 Not Found', {}, ''), # PUT to list container ])) req = Request.blank('http://origin_db.com:8080/v1/acc/cont', environ={'REQUEST_METHOD': 'PUT'}) resp = req.get_response(self.test_origin) self.assertEquals(resp.status_int, 500)
def AnalyseResult(l_download): success_download_count, fail_download_count, timeout_download_count = 0,0,0 try: if len(l_download) != 0 : for i in iter(l_download): if i[0] == 'success': success_download_count += 1 elif i[0] == 'fail': fail_download_count += 1 else: timeout_download_count += 1 cost_of_valid_download = [x[1] for x in iter(l_download) if x[0] == 'success'] # summary logger.info('-'*30 + "SUMMARY" + '-'*30) logger.info('Total Download: %s, Success: %s, Fail: %s, Timeout: %s' % (len(l_download),success_download_count,fail_download_count,timeout_download_count)) if len(cost_of_valid_download): logger.info('\tThe fastest download in %s seconds' % min(cost_of_valid_download)) logger.info('\tThe slowest download in %s seconds' % max(cost_of_valid_download)) logger.info('\tThe average download in %s seconds' % str(sum(cost_of_valid_download)/len(cost_of_valid_download))) else: logger.error('\tNone valid download!!!') if fail_download_count == 0 and timeout_download_count == 0: ResultLog.info('PASS') else: ResultLog.error('FAIL') except Exception as err: print(err)
# Dataset & Dataloader train_dataset = torchvision.datasets.MNIST( root="./data", train=True, transform=transforms.ToTensor(), download=True ) test_dataset = torchvision.datasets.MNIST( root="./data", train=False, transform=transforms.ToTensor() ) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=batch_size, shuffle=True ) test_loader = torch.utils.data.DataLoader( dataset=test_dataset, batch_size=batch_size, shuffle=False ) # look at one batch of data, using iter we can see one batch of data examples = iter(train_loader) samples, labels = examples.next() print(samples.shape, labels.shape) for i in range(5): plt.subplot(3, 2, i + 1) plt.imshow(samples[i][0]) # plt.show() # model building class DigitNet(nn.Module): def __init__(self, input_size, hidden_size, output_classes): super(DigitNet, self).__init__() self.layer1 = nn.Linear(input_size, hidden_size).cuda() self.relu = nn.ReLU().cuda()
def md5sum(filename, block_size=65536): my_hash = hashlib.md5() with open(filename, "r+b") as f: for block in iter(lambda: f.read(block_size), ""): my_hash.update(block) return my_hash.hexdigest()
def scanlist(iprange, portranges, methods): for ip in iter(iprange): for portrange in portranges: for port in portrange: for method in methods: yield (ip, port, method)
name = '_sip._' + proto + '.' + domainname + '.' try: log.debug('trying to resolve SRV for %s' % name) ans = dns.resolver.query(name, 'SRV') except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer), err: log.info('Could not resolve %s' % name) continue for a in ans.response.answer: log.info('got an answer %s' % a.to_text()) for _tmp in a: for method in methods: try: hostname = socket.gethostbyname( _tmp.target.to_text()) except socket.error: log.warn("%s could not be resolved" % _tmp.target.to_text()) continue log.debug("%s resolved to %s" % (_tmp.target.to_text(), hostname)) yield (hostname, _tmp.port, method) if __name__ == '__main__': print getranges('1.1.1.1/24') seq = getranges('google.com/24') if seq is not None: a = ip4range(seq) for x in iter(a): print x
def test_overfit_batch_limits(tmpdir): # ------------------------------------------------------ # Make sure shuffle is correct across loaders initially # ------------------------------------------------------ model = EvalModelTemplate() model.train_dataloader() # original train loader which should be replaced in all methods train_loader = model.train_dataloader() # make sure the val and tests are not shuffled assert isinstance(train_loader.sampler, RandomSampler) assert isinstance(model.val_dataloader().sampler, SequentialSampler) assert isinstance(model.test_dataloader().sampler, SequentialSampler) # ------------------------------------------------------ # get the training loader and batch # ------------------------------------------------------ train_loader = DataLoader(model.train_dataloader().dataset, shuffle=False) full_train_samples = len(train_loader) num_train_samples = int(0.11 * full_train_samples) (xa, ya) = next(iter(train_loader)) # ------------------------------------------------------ # set VAL and Test loaders # ------------------------------------------------------ val_loader = DataLoader(model.val_dataloader().dataset, shuffle=False) test_loader = DataLoader(model.test_dataloader().dataset, shuffle=False) # set the model loaders model.train_dataloader = lambda: train_loader model.val_dataloader = lambda: val_loader model.test_dataloader = lambda: test_loader # ------------------------------------------------------ # test train loader applies correct limits # ------------------------------------------------------ trainer = Trainer(overfit_batches=4) trainer.reset_train_dataloader(model) assert trainer.num_training_batches == 4 # make sure the loaders are the same (xb, yb) = next(iter(trainer.train_dataloader)) assert torch.eq(xa, xb).all() assert torch.eq(ya, yb).all() trainer = Trainer(overfit_batches=0.11) trainer.reset_train_dataloader(model) assert trainer.train_dataloader is train_loader assert trainer.num_training_batches == num_train_samples # make sure the loaders are the same (xb, yb) = next(iter(trainer.train_dataloader)) assert torch.eq(xa, xb).all() assert torch.eq(ya, yb).all() # ------------------------------------------------------ # run tests for both val and test # ------------------------------------------------------ for split in ['val', 'test']: # ------------------------------------------------------ # test overfit_batches as percent # ------------------------------------------------------ loader_num_batches, dataloaders = Trainer(overfit_batches=0.11)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == num_train_samples # make sure we turned off shuffle for the user assert isinstance(dataloaders[0].sampler, SequentialSampler) # make sure the loaders are the same (xb, yb) = next(iter(dataloaders[0])) assert torch.eq(xa, xb).all() assert torch.eq(ya, yb).all() # ------------------------------------------------------ # test overfit_batches as int # ------------------------------------------------------ loader_num_batches, dataloaders = Trainer(overfit_batches=1)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 1 loader_num_batches, dataloaders = Trainer(overfit_batches=5)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 5 # ------------------------------------------------------ # test limit_xxx_batches as percent AND int # ------------------------------------------------------ if split == 'val': loader_num_batches, dataloaders = Trainer(limit_val_batches=0.1)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == int(0.1 * len(val_loader)) loader_num_batches, dataloaders = Trainer(limit_val_batches=10)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 10 else: loader_num_batches, dataloaders = Trainer(limit_test_batches=0.1)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == int(0.1 * len(test_loader)) loader_num_batches, dataloaders = Trainer(limit_test_batches=10)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 10
def __init__(self, stream): self.stream = iter(stream) self.buffer = [] self.pos = 0
def __iter__(self): return iter(self.children)
def read_lines(file, amount, fillvalue=None): args = [iter(file)] * amount return zip_longest(*args, fillvalue=fillvalue)
def sortedrun(self, *args, **kwargs): return iter(sorted(self.run(*args, **kwargs), key=lambda p: p.starttime))
def __iter__(self): return iter(self.variables)
def _largest_common_subgraph(self, candidates, constraints, to_be_mapped=None): """ Find all largest common subgraphs honoring constraints. """ if to_be_mapped is None: to_be_mapped = {frozenset(self.subgraph.nodes)} # The LCS problem is basically a repeated subgraph isomorphism problem # with smaller and smaller subgraphs. We store the nodes that are # "part of" the subgraph in to_be_mapped, and we make it a little # smaller every iteration. # pylint disable becuase it's guarded against by default value current_size = len( next(iter(to_be_mapped), []) ) # pylint: disable=stop-iteration-return found_iso = False if current_size <= len(self.graph): # There's no point in trying to find isomorphisms of # graph >= subgraph if subgraph has more nodes than graph. # Try the isomorphism first with the nodes with lowest ID. So sort # them. Those are more likely to be part of the final # correspondence. This makes finding the first answer(s) faster. In # theory. for nodes in sorted(to_be_mapped, key=sorted): # Find the isomorphism between subgraph[to_be_mapped] <= graph next_sgn = min(nodes, key=lambda n: min(candidates[n], key=len)) isomorphs = self._map_nodes( next_sgn, candidates, constraints, to_be_mapped=nodes ) # This is effectively `yield from isomorphs`, except that we look # whether an item was yielded. try: item = next(isomorphs) except StopIteration: pass else: yield item yield from isomorphs found_iso = True # BASECASE if found_iso or current_size == 1: # Shrinking has no point because either 1) we end up with a smaller # common subgraph (and we want the largest), or 2) there'll be no # more subgraph. return left_to_be_mapped = set() for nodes in to_be_mapped: for sgn in nodes: # We're going to remove sgn from to_be_mapped, but subject to # symmetry constraints. We know that for every constraint we # have those subgraph nodes are equal. So whenever we would # remove the lower part of a constraint, remove the higher # instead. This is all dealth with by _remove_node. And because # left_to_be_mapped is a set, we don't do double work. # And finally, make the subgraph one node smaller. # REDUCTION new_nodes = self._remove_node(sgn, nodes, constraints) left_to_be_mapped.add(new_nodes) # COMBINATION yield from self._largest_common_subgraph( candidates, constraints, to_be_mapped=left_to_be_mapped )
def prepare(): global vocab, written_lines # Files to be prepared files = { '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['src']).replace( preprocessing['train_folder'], '').lstrip('\\/'): {'amount': 1, 'up_to': -1}, # copy all of data (up to "samples") '{}.{}'.format(hparams['dev_prefix'].replace('.bpe', ''), hparams['src']).replace(preprocessing['train_folder'], '').lstrip('\\/'): { 'amount': .1, 'up_to': preprocessing['test_size']}, # copy 1/10th but up to 'test_size' '{}.{}'.format(hparams['test_prefix'].replace('.bpe', ''), hparams['src']).replace( preprocessing['train_folder'], '').lstrip('\\/'): {'amount': .1, 'up_to': preprocessing['test_size']}, '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['tgt']).replace( preprocessing['train_folder'], '').lstrip('\\/'): {'amount': 1, 'up_to': -1}, '{}.{}'.format(hparams['dev_prefix'].replace('.bpe', ''), hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip('\\/'): { 'amount': .1, 'up_to': preprocessing['test_size']}, '{}.{}'.format(hparams['test_prefix'].replace('.bpe', ''), hparams['tgt']).replace( preprocessing['train_folder'], '').lstrip('\\/'): {'amount': .1, 'up_to': preprocessing['test_size']}, } # pprint.pformat(files, indent=4) print(colorama.Fore.GREEN + "\nPreparing training set from raw set" + colorama.Fore.RESET) # Ensure that train folder exists try: os.makedirs(preprocessing['train_folder']) except OSError as e: if e.errno != errno.EEXIST: raise # Ensure that model/log folder exists train_log_dir = os.path.join(hparams['out_dir'], 'train_log') try: os.makedirs(train_log_dir) except OSError as e: if e.errno != errno.EEXIST: raise data_vocab = Counter() # Iterate thru files and prepare them for file_name, amounts in files.items(): vocab = Counter() print("File: {}{}{}".format(colorama.Fore.GREEN, file_name, colorama.Fore.RESET)) # Output file handler out_file = open('{}/{}'.format(preprocessing['train_folder'], file_name), 'w', encoding='utf-8', buffering=131072) # Maximum number of lines read = 0 amount = int(min(amounts['amount'] * preprocessing['samples'] if preprocessing['samples'] > 0 else 10 ** 20, amounts['up_to'] if amounts['up_to'] > 0 else 10 ** 20)) # Prepare thread variables write_thread = None vocab_thread = None written_lines = 0 # We are going to use multiprocessing for tokenization, as it's cpu intensive with Pool(processes=preprocessing['cpu_count']) as pool: # Count number of lines in file progress = tqdm(ascii=True, unit=' lines', total=min(amount, sum(1 for _ in open( '{}/{}'.format(preprocessing['source_folder'], file_name), 'r', encoding='utf-8', buffering=131072)))) # Open input file with open('{}/{}'.format(preprocessing['source_folder'], file_name), 'r', encoding='utf-8', buffering=131072) as in_file: last_batch = False # Iterate every 10k lines for rows in read_lines(in_file, 10000, ''): # If number of lines is greater than limit - break read += len(rows) if read >= amount: rows = rows[:amount - read + len(rows)] last_batch = True # Process using multiprocessing rows = pool.map(tokenize, rows, 100) # Process vocab using multiprocessing vocab_part = pool.map(sentence_split, rows, 100) # Join running threads from previous loop if write_thread is not None: write_thread.join() vocab_thread.join() progress.update(written_lines) # Thread for vocab update vocab_thread = Thread(target=append_vocab, args=(vocab_part,)) vocab_thread.start() # And thread for saving tokenized data to output file write_thread = Thread(target=write_lines, args=(out_file, rows, written_lines == 0)) write_thread.start() # Last batch - break / exit loop if last_batch: break # Join running threads and update progress bar write_thread.join() vocab_thread.join() progress.update(written_lines) progress.close() # If it's train file, save vocab if file_name == '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['src']).replace( preprocessing['train_folder'], '').lstrip('\\/'): data_vocab[hparams['src']] = vocab elif file_name == '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['tgt']).replace( preprocessing['train_folder'], '').lstrip('\\/'): data_vocab[hparams['tgt']] = vocab # If joined vocab - add counters if preprocessing['joined_vocab']: data_vocab[hparams['src']] += data_vocab[hparams['tgt']] del data_vocab[hparams['tgt']] # BPE/WPM-like tokenization # inspired by and based on https://github.com/rsennrich/subword-nmt if preprocessing['use_bpe']: print(colorama.Fore.GREEN + "\nLearning BPE" + colorama.Fore.RESET) # List of subword joins to be applied to training data joins = {} # Final train vocab for NMT train_vocab = {} # Learn BPE for both vocabs (or common vocab) for source, raw_vocab in data_vocab.items(): # Pair stats stats = Counter() # Pair indexes indices = defaultdict(lambda: defaultdict(int)) # Build 'new' vocab used for BPE learning (train_vocab will be a final vocab for NMT) vocab = [] train_vocab[source] = Counter() # Build vocab for BPE learning purpose print("Building temporary vocab ({})".format(hparams['src'] if preprocessing['joined_vocab'] else source)) for i, (entity, freq) in tqdm(enumerate(raw_vocab.most_common()), ascii=True, unit=' tokens'): # Split vocab token entity = tuple(entity.split()) # Make pairs ("ABCD" -> (A, B), (B, C), (C, D)), stats, indexes and train vocab prev_char = entity[0] train_vocab[source][prev_char] += freq for char in entity[1:]: stats[prev_char, char] += freq indices[prev_char, char][i] += 1 train_vocab[source][char] += freq prev_char = char vocab.append((entity, freq)) print("Learning BPE for vocab of {} tokens".format(preprocessing['vocab_size'])) # List of joins per vocab joins[source] = [] # Partial stats speeds up learning process - optimization for 'max' above partial_stats = Counter(['', -1]) partial_stats_min = 0 update_partial_stats = True # Current number of vocab tokens train_vocab_len = prev_train_vocab_len = len(train_vocab[source]) # Progress bar progress = tqdm(ascii=True, unit=' tokens', total=preprocessing['vocab_size'], maxinterval=0.1, miniters=10) progress.monitor_interval = 1 progress.update(prev_train_vocab_len) # Learn until vocab will contain desired number of tokens while train_vocab_len < preprocessing['vocab_size']: clean_train_vocab = False # Get most frequent pair most_frequent, freq = partial_stats.most_common(1)[0] # Update partial stats or frequency of most frequent pair is less than saved minimum for partial stats if update_partial_stats or freq <= partial_stats_min: partial_stats_min = partial_stats.most_common(500)[-1][1] partial_stats = Counter() for k, v in stats.most_common(): if v < partial_stats_min: break partial_stats[k] = v update_partial_stats = False # Get most frequent pair (again, proper one this time) most_frequent, _ = partial_stats.most_common(1)[0] # If frequency is lower than 2 - exit if stats[most_frequent] < 2: print( 'No pair has frequency greater than 1. Stopping earlier, your vocab file will include less tokens.\n') break # Replace pair "A B" with new entity "AB" # Changes made changes = [] # Replace regex pattern = re.compile(r'(?<!\S)' + re.escape(' '.join(most_frequent)) + r'(?!\S)') # Loop through indices for j, freq in indices[most_frequent].items(): # Do not touch not existent pairs if freq < 1: continue # Get entity and frequency entity, freq = vocab[j] # Replace "A B" with "AB" in entity new_entity = pattern.sub(''.join(most_frequent), ' '.join(entity)) new_entity = tuple(new_entity.split()) # Update entity vocab[j] = (new_entity, freq) changes.append((j, new_entity, entity, freq)) # Update indices and pair stats # Merged pair doesn't exist anymore stats[most_frequent] = 0 partial_stats[most_frequent] = 0 indices[most_frequent] = defaultdict(int) # Get entities and a new pair first, second = most_frequent new_pair = first + second # Iterate through all changes for j, entity, old_entity, freq in changes: # Find all occurences of first pair entity prev = -2 for i in iter([i for i, entity in enumerate(old_entity) if entity == first]): # Do not touch second "B B" if "B B B" if i == prev + 1: continue # Check if second pair entity follows first one if i < len(old_entity) - 1 and old_entity[i + 1] == second: # Reduce frequency of "A B" in "A B C D" where "B C" is a merged pair if i: prev = old_entity[i - 1:i + 1] stats[prev] -= freq partial_stats[prev] = stats[prev] indices[prev][j] -= 1 # Reduce frequency of "C D" in "A B C D" where "B C" is a merged pair if i < len(old_entity) - 2: # But do not touch "C B" if "A B C B C" as values will be adjusted with next occurence of "B C" pair if old_entity[i + 2] != first or i >= len(old_entity) - 3 or old_entity[ i + 3] != second: next = old_entity[i + 1:i + 3] stats[next] -= freq partial_stats[next] = stats[next] indices[next][j] -= 1 prev = i if train_vocab[source][first] <= freq or train_vocab[source][second] <= freq: clean_train_vocab = True train_vocab[source][first] -= freq train_vocab[source][second] -= freq # Find all occurences of first pair entity for i in [i for i, entity in enumerate(entity) if entity == new_pair]: # Increase frequency of (new pair) "A BC" in "A BC D" if i: prev = entity[i - 1:i + 1] stats[prev] += freq if stats[prev] > partial_stats_min: update_partial_stats = True indices[prev][j] += 1 # Increase frequency of (new pair) "BC D" in "A BC D", but do not touch if "A BC BC" as stats for "BC BC" will be adjusted win next occurence of "BC" pair if i < len(entity) - 1 and entity[i + 1] != new_pair: next = entity[i:i + 2] stats[next] += freq if stats[next] > partial_stats_min: update_partial_stats = True indices[next][j] += 1 # Set frequency of a new pair train_vocab[source][new_pair] += freq # Current pair is merged - is not a pair anymore, so has frequency of 0 stats[most_frequent] = 0 partial_stats[most_frequent] = 0 # Remove (from training vocab) tokens with frequency of 0 if clean_train_vocab: train_vocab[source] = +train_vocab[source] # Calculate current number of train vocab entities prev_train_vocab_len = train_vocab_len train_vocab_len = len(train_vocab[source]) train_vocab_len_diff = train_vocab_len - prev_train_vocab_len # Update progress bar if train_vocab_len_diff >= 0: progress.update(train_vocab_len_diff) # For a negative number set new value directly - tqdm doesn't support negative updates else: progress.n += train_vocab_len_diff progress.refresh() # Add new join pair joins[source].append(most_frequent) # Save list of joins for train vocab joins[source] = dict(reversed([(v, i) for i, v in enumerate(joins[source])])) # Done progress.close() # Save list of joins to a file (joined vocab) and replace main vocabs if preprocessing['joined_vocab']: with open('{}/{}'.format(preprocessing['train_folder'], 'bpe_joins.common.json'), 'w', encoding='utf-8', buffering=131072) as bpe_file: json.dump({json.dumps(k): v for k, v in joins[hparams['src']].items()}, bpe_file) data_vocab[hparams['src']] = train_vocab[hparams['src']] # Save list of joins to files (separated vocab) else: with open('{}/{}'.format(preprocessing['train_folder'], 'bpe_joins.{}.json'.format(hparams['src'])), 'w', encoding='utf-8', buffering=131072) as bpe_file: json.dump({json.dumps(k): v for k, v in joins[hparams['src']].items()}, bpe_file) with open('{}/{}'.format(preprocessing['train_folder'], 'bpe_joins.{}.json'.format(hparams['tgt'])), 'w', encoding='utf-8', buffering=131072) as bpe_file: json.dump({json.dumps(k): v for k, v in joins[hparams['tgt']].items()}, bpe_file) data_vocab[hparams['src']] = train_vocab[hparams['src']] data_vocab[hparams['tgt']] = train_vocab[hparams['tgt']] print(colorama.Fore.GREEN + "\nApplying BPE" + colorama.Fore.RESET) # BPE files to be prepared bpe_files = [ '{}.{}'.format(hparams['train_prefix'], hparams['src']).replace(preprocessing['train_folder'], '').lstrip( '\\/'), '{}.{}'.format(hparams['dev_prefix'], hparams['src']).replace(preprocessing['train_folder'], '').lstrip( '\\/'), '{}.{}'.format(hparams['test_prefix'], hparams['src']).replace(preprocessing['train_folder'], '').lstrip( '\\/'), '{}.{}'.format(hparams['train_prefix'], hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip( '\\/'), '{}.{}'.format(hparams['dev_prefix'], hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip( '\\/'), '{}.{}'.format(hparams['test_prefix'], hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip( '\\/'), ] # Iterate thru files and apply BPE for i, file_name in enumerate(bpe_files): # Current train vocab source = hparams['src'] if preprocessing['joined_vocab'] else file_name.split('.')[-1] print("File: {}{}{}".format(colorama.Fore.GREEN, file_name, colorama.Fore.RESET)) # Output file handler out_file = open('{}/{}'.format(preprocessing['train_folder'], file_name), 'w', encoding='utf-8', buffering=131072) # Prepare thread variables write_thread = None written_lines = 0 # We are going to use multiprocessing for joins, as it's cpu intensive with Pool(processes=preprocessing['cpu_count'], initializer=apply_bpe_init, initargs=(joins[source],)) as pool: # Progress bar progress = tqdm(ascii=True, unit=' lines', total=sum(1 for _ in open( '{}/{}'.format(preprocessing['train_folder'], file_name.replace('.bpe.', '.')), 'r', encoding='utf-8', buffering=131072))) # Open input file with open('{}/{}'.format(preprocessing['train_folder'], file_name.replace('.bpe.', '.')), 'r', encoding='utf-8', buffering=131072) as in_file: # Iterate every 10k lines for rows in read_lines(in_file, 10000, ''): # Process using multiprocessing rows = pool.map(apply_bpe, rows, 100) # Join running threads from previous loop if write_thread is not None: write_thread.join() # vocab_thread.join() # print('+') progress.update(written_lines) # vocab_thread2.join() # Thread for saving tokenized data to output BPE file write_thread = Thread(target=write_lines, args=(out_file, rows, written_lines == 0)) write_thread.start() # Join running threads and update progress bar write_thread.join() progress.update(written_lines) progress.close() # Remove unnecessary train file (BPE one will be used by NMT) os.remove('{}/{}'.format(preprocessing['train_folder'], file_name.replace('.bpe.', '.'))) print(colorama.Fore.GREEN + "\nPostprocessing and saving vocabs" + colorama.Fore.RESET) # Vocab files to be prepared # Joined vocab if preprocessing['joined_vocab']: vocab_files = [ '{}.{}'.format(hparams['train_prefix'].replace('train', 'vocab'), hparams['src']).replace( preprocessing['train_folder'], '').lstrip('\\/'), ] # Separated vocabs else: vocab_files = [ '{}.{}'.format(hparams['train_prefix'].replace('train', 'vocab'), hparams['src']).replace( preprocessing['train_folder'], '').lstrip('\\/'), '{}.{}'.format(hparams['train_prefix'].replace('train', 'vocab'), hparams['tgt']).replace( preprocessing['train_folder'], '').lstrip('\\/'), ] for vocab_file_name in vocab_files: print("File: {}{}{}".format(colorama.Fore.GREEN, vocab_file_name, colorama.Fore.RESET)) # Get most common entities source = vocab_file_name.split('.')[-1] data_vocab[source] = [entity for entity, _ in data_vocab[source].most_common()] # Write entities to a file with open('{}/{}'.format(preprocessing['train_folder'], vocab_file_name), 'w', encoding='utf-8', buffering=131072) as vocab_file: vocab_file.write("<unk>\n<s>\n</s>\n" + "\n".join(data_vocab[source][:preprocessing['vocab_size']])) with open('{}/{}'.format(preprocessing['train_folder'], vocab_file_name.replace('vocab', 'vocab_unused')), 'w', encoding='utf-8', buffering=131072) as vocab_file: vocab_file.write("\n".join(data_vocab[source][preprocessing['vocab_size']:])) print(colorama.Fore.GREEN + "\nWriting pbtxt file" + colorama.Fore.RESET) # Write pbtxt file for metadata for embeddings with open('{}/{}'.format(os.path.join(train_log_dir), 'projector_config.pbtxt'), 'w', encoding='utf-8', buffering=131072) as pbtxt_file: pbtxt_file.write(('''embeddings {{\n tensor_name: 'embeddings/decoder/embedding_decoder'\n ''' + '''metadata_path: '{}'\n}}\nembeddings {{\n ''' + '''tensor_name: 'embeddings/encoder/embedding_encoder'\n metadata_path: '{}'\n}}''').format( '{}/{}'.format(preprocessing['train_folder'], vocab_files[0].replace('train', 'vocab')), '{}/{}'.format(preprocessing['train_folder'], vocab_files[0 if preprocessing['joined_vocab'] else 1].replace('train', 'vocab')) )) print(colorama.Fore.GREEN + "\nAll done" + colorama.Fore.RESET)
def dict_mp(*args): """Used as a dict compatible with multiprocess environment""" return dict(zip(*[iter(args)] * 2))
def nsigtf_sl(cseq, gd, wins, nn, Ls=None, real=False, reducedform=0, measurefft=False, multithreading=False): cseq = iter(cseq) dtype = gd[0].dtype fft = fftp(measure=measurefft, dtype=dtype) ifft = irfftp(measure=measurefft, dtype=dtype) if real else ifftp(measure=measurefft, dtype=dtype) if real: ln = len(gd)//2+1-reducedform*2 fftsymm = lambda c: np.hstack((c[0],c[-1:0:-1])).conj() if reducedform: # no coefficients for f=0 and f=fs/2 symm = lambda fc: chain(fc, imap(fftsymm,fc[::-1])) sl = lambda x: chain(x[reducedform:len(gd)//2+1-reducedform],x[len(gd)//2+reducedform:len(gd)+1-reducedform]) else: symm = lambda fc: chain(fc,imap(fftsymm,fc[-2:0:-1])) sl = lambda x: x else: ln = len(gd) symm = lambda fc: fc sl = lambda x: x maxLg = max(len(gdii) for gdii in sl(gd)) # get first slice c0 = cseq.next() fr = np.empty(nn, dtype=c0[0].dtype) # Allocate output temp0 = np.empty(maxLg, dtype=fr.dtype) # pre-allocation if multithreading and MP is not None: mmap = MP.Pool().map else: mmap = map loopparams = [] for gdii,win_range in izip(sl(gd), sl(wins)): Lg = len(gdii) temp = temp0[:Lg] wr1 = win_range[:(Lg)//2] wr2 = win_range[-((Lg+1)//2):] # wr1,wr2 = win_range sl1 = slice(None, (Lg+1)//2) sl2 = slice(-(Lg//2), None) p = (gdii,wr1,wr2,sl1,sl2,temp) loopparams.append(p) # main loop over slices for c in chain((c0,),cseq): assert len(c) == ln # do transforms on coefficients # TODO: for matrixform we could do a FFT on the whole matrix along one axis # this could also be nicely parallalized fc = mmap(fft, c) fc = symm(fc) # The overlap-add procedure including multiplication with the synthesis windows fr = nsigtf_loop(loopparams, fr, fc) ftr = fr[:nn//2+1] if real else fr sig = ifft(ftr, outn=nn) sig = sig[:Ls] # Truncate the signal to original length (if given) yield sig
def __iter__(self): return iter(self.needed)
def __iter__(self): return iter(self.indices)
viewpoints = viewpoints.view((-1, m, *v_dims)) # Partition into context and query sets context_idx, query_idx = indices[:-1], indices[-1] x, v = images[:, context_idx], viewpoints[:, context_idx] x_q, v_q = images[:, query_idx], viewpoints[:, query_idx] return x, v, x_q, v_q import random # Pick a scene to visualise scene_id = 3 # Load data x, v = next(iter(loader)) x_, v_ = x.squeeze(0), v.squeeze(0) # Sample a set of views n_context = 13 + 1 indices = random.sample([i for i in range(v_.size(1))], n_context) # Seperate into context and query sets x_c, v_c, x_q, v_q = deterministic_partition(x, v, indices) # Visualise context and query images f, axarr = plt.subplots(1, 15, figsize=(20, 7)) for i, ax in enumerate(axarr.flat): # Move channel dimension to end ax.imshow(x_[scene_id][i].permute(1, 2, 0))
def __iter__(self): return iter(self.keys())
def default(cls): try: return next(iter(cls)) except StopIteration: return None
def test_build_dataset(dataset): color_seqs, word_seqs, vocab = dataset mod = ContextualColorDescriber(vocab) dataset = mod.build_dataset(color_seqs, word_seqs) result = next(iter(dataset)) assert len(result) == 3
def test(arg=None): if arg == "-v": def say(*x): print(*x) else: def say(*x): pass say("Start Pool testing") get_tid = lambda: threading.current_thread().ident def return42(): return 42 def f(x): return x * x def work(mseconds): res = str(mseconds) if mseconds < 0: mseconds = -mseconds say("[%d] Start to work for %fms..." % (get_tid(), mseconds * 10)) time.sleep(mseconds / 100.) say("[%d] Work done (%fms)." % (get_tid(), mseconds * 10)) return res ### Test copy/pasted from multiprocessing pool = Pool(4) # start worker threads # edge cases assert pool.map(return42, []) == [] assert pool.apply_async(return42, []).get() == 42 assert pool.apply(return42, []) == 42 assert list(pool.imap(return42, iter([]))) == [] assert list(pool.imap_unordered(return42, iter([]))) == [] assert pool.map_async(return42, []).get() == [] assert list(pool.imap_async(return42, iter([])).get()) == [] assert list(pool.imap_unordered_async(return42, iter([])).get()) == [] # basic tests result = pool.apply_async(f, (10, )) # evaluate "f(10)" asynchronously assert result.get(timeout=1) == 100 # ... unless slow computer assert list(pool.map(f, range(10))) == list(map(f, range(10))) it = pool.imap(f, range(10)) assert next(it) == 0 assert next(it) == 1 assert next(it) == 4 # Test apply_sync exceptions result = pool.apply_async(time.sleep, (3, )) try: say(result.get(timeout=1)) # raises `TimeoutError` except TimeoutError: say("Good. Got expected timeout exception.") else: assert False, "Expected exception !" assert result.get() is None # sleep() returns None def cb(s): say("Result ready: %s" % s) # Test imap() assert list(pool.imap(work, range(10, 3, -1), chunksize=4)) == list(map(str, range(10, 3, -1))) # Test imap_unordered() assert sorted(pool.imap_unordered(work, range(10, 3, -1))) == sorted( map(str, range(10, 3, -1))) # Test map_async() result = pool.map_async(work, range(10), callback=cb) try: result.get(timeout=0.01) # raises `TimeoutError` except TimeoutError: say("Good. Got expected timeout exception.") else: assert False, "Expected exception !" say(result.get()) # Test imap_async() result = pool.imap_async(work, range(3, 10), callback=cb) try: result.get(timeout=0.01) # raises `TimeoutError` except TimeoutError: say("Good. Got expected timeout exception.") else: assert False, "Expected exception !" for i in result.get(): say("Item:", i) say("### Loop again:") for i in result.get(): say("Item2:", i) # Test imap_unordered_async() result = pool.imap_unordered_async(work, range(10, 3, -1), callback=cb) try: say(result.get(timeout=0.01)) # raises `TimeoutError` except TimeoutError: say("Good. Got expected timeout exception.") else: assert False, "Expected exception !" for i in result.get(): say("Item1:", i) for i in result.get(): say("Item2:", i) r = result.get() for i in r: say("Item3:", i) for i in r: say("Item4:", i) for i in r: say("Item5:", i) # # The case for the exceptions # # Exceptions in imap_unordered_async() result = pool.imap_unordered_async(work, range(2, -10, -1), callback=cb) time.sleep(3) try: for i in result.get(): say("Got item:", i) except (IOError, ValueError): say("Good. Got expected exception") # Exceptions in imap_async() result = pool.imap_async(work, range(2, -10, -1), callback=cb) time.sleep(3) try: for i in result.get(): say("Got item:", i) except (IOError, ValueError): say("Good. Got expected exception") # Stop the test: need to stop the pool !!! pool.terminate() pool.join()
def __iter__(self): return iter(())
def setUp(self): """Runs before each test.""" if 'LANG' in iter(os.environ.keys()): os.environ.__delitem__('LANG')
def __iter__(self): return iter(self.all)
def __iter__(self): return iter(np.asarray(self))
def tearDown(self): """Runs after each test.""" if 'LANG' in iter(os.environ.keys()): os.environ.__delitem__('LANG')
#load best model datagenTest = ImageDataGenerator() datagenTest.config['random_crop_size'] = image_size datagenTest.set_pipeline([random_crop,standardize,compute_fft2]) flow_test = datagenTest.flow_from_directory(test_clean_dir,batch_size=50,color_mode='rgbfft',target_size=image_size) flow_test.setCurrentISO(SELECTED_ISO, test_noisy_dir) flow_test.setGANdir(test_GAN_dir) flow_test.batch_size = 3#batchsizes_for_isos[str(ISO_LEVEL)] total_batch_size = 500 x = np.zeros((total_batch_size,image_size[0],image_size[1],6)) y_true = np.zeros((total_batch_size,7)) iter_flow = iter(flow_test) for i in range(total_batch_size//flow_test.batch_size): if (((i*flow_test.batch_size)%len(flow_test.filenames))==0): flow_test.on_epoch_end() iter_flow = iter(flow_test) x_cur,y_cur = next(iter_flow) x[i*flow_test.batch_size:(i+1)*flow_test.batch_size] = x_cur y_true[i*flow_test.batch_size:(i+1)*flow_test.batch_size]=y_cur model = create_model(image_size, num_classes=num_classes) model.load_weights('trained_models/{}_SIDD_several_classes_weights.h5'.format(SELECTED_ISO)) y_pred = model.predict(x) y_true = y_true.argmax(axis=1) y_pred = y_pred.argmax(axis=1)
def main(): args = parse_train_arg() task = task_dict[args.task] init_distributed_mode(args) logger = init_logger(args) if hasattr(args, 'base_model_name'): logger.warning( 'Argument base_model_name is deprecated! Use `--table-bert-extra-config` instead!' ) init_signal_handler() train_data_dir = args.data_dir / 'train' dev_data_dir = args.data_dir / 'dev' table_bert_config = task['config'].from_file( args.data_dir / 'config.json', **args.table_bert_extra_config) if args.is_master: args.output_dir.mkdir(exist_ok=True, parents=True) with (args.output_dir / 'train_config.json').open('w') as f: json.dump(vars(args), f, indent=2, sort_keys=True, default=str) logger.info(f'Table Bert Config: {table_bert_config.to_log_string()}') # copy the table bert config file to the working directory # shutil.copy(args.data_dir / 'config.json', args.output_dir / 'tb_config.json') # save table BERT config table_bert_config.save(args.output_dir / 'tb_config.json') assert args.data_dir.is_dir(), \ "--data_dir should point to the folder of files made by pregenerate_training_data.py!" if args.cpu: device = torch.device('cpu') else: device = torch.device(f'cuda:{torch.cuda.current_device()}') logger.info( "device: {} gpu_id: {}, distributed training: {}, 16-bits training: {}" .format(device, args.local_rank, bool(args.multi_gpu), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) real_batch_size = args.train_batch_size # // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if not args.cpu: torch.cuda.manual_seed_all(args.seed) if args.output_dir.is_dir() and list(args.output_dir.iterdir()): logger.warning( f"Output directory ({args.output_dir}) already exists and is not empty!" ) args.output_dir.mkdir(parents=True, exist_ok=True) # Prepare model if args.multi_gpu and args.global_rank != 0: torch.distributed.barrier() if args.no_init: raise NotImplementedError else: model = task['model'](table_bert_config) if args.multi_gpu and args.global_rank == 0: torch.distributed.barrier() if args.fp16: model = model.half() model = model.to(device) if args.multi_gpu: if args.ddp_backend == 'pytorch': model = nn.parallel.DistributedDataParallel( model, find_unused_parameters=True, device_ids=[args.local_rank], output_device=args.local_rank, broadcast_buffers=False) else: import apex model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) model_ptr = model.module else: model_ptr = model # set up update parameters for LR scheduler dataset_cls = task['dataset'] train_set_info = dataset_cls.get_dataset_info(train_data_dir, args.max_epoch) total_num_updates = train_set_info[ 'total_size'] // args.train_batch_size // args.world_size // args.gradient_accumulation_steps args.max_epoch = train_set_info['max_epoch'] logger.info( f'Train data size: {train_set_info["total_size"]} for {args.max_epoch} epochs, total num. updates: {total_num_updates}' ) args.total_num_update = total_num_updates args.warmup_updates = int(total_num_updates * 0.1) trainer = Trainer(model, args) checkpoint_file = args.output_dir / 'model.ckpt.bin' is_resumed = False # trainer.save_checkpoint(checkpoint_file) if checkpoint_file.exists(): logger.info(f'Logging checkpoint file {checkpoint_file}') is_resumed = True trainer.load_checkpoint(checkpoint_file) model.train() # we also partitation the dev set for every local process logger.info('Loading dev set...') sys.stdout.flush() dev_set = dataset_cls(epoch=0, training_path=dev_data_dir, tokenizer=model_ptr.tokenizer, config=table_bert_config, multi_gpu=args.multi_gpu, debug=args.debug_dataset) logger.info("***** Running training *****") logger.info(f" Current config: {args}") if trainer.num_updates > 0: logger.info(f'Resume training at epoch {trainer.epoch}, ' f'epoch step {trainer.in_epoch_step}, ' f'global step {trainer.num_updates}') start_epoch = trainer.epoch for epoch in range(start_epoch, args.max_epoch): # inclusive model.train() with torch.random.fork_rng( devices=None if args.cpu else [device.index]): torch.random.manual_seed(131 + epoch) epoch_dataset = dataset_cls(epoch=trainer.epoch, training_path=train_data_dir, config=table_bert_config, tokenizer=model_ptr.tokenizer, multi_gpu=args.multi_gpu, debug=args.debug_dataset) train_sampler = RandomSampler(epoch_dataset) train_dataloader = DataLoader(epoch_dataset, sampler=train_sampler, batch_size=real_batch_size, num_workers=0, collate_fn=epoch_dataset.collate) samples_iter = GroupedIterator(iter(train_dataloader), args.gradient_accumulation_steps) trainer.resume_batch_loader(samples_iter) with tqdm(total=len(samples_iter), initial=trainer.in_epoch_step, desc=f"Epoch {epoch}", file=sys.stdout, disable=not args.is_master, miniters=100) as pbar: for samples in samples_iter: logging_output = trainer.train_step(samples) pbar.update(1) pbar.set_postfix_str(', '.join( f"{k}: {v:.4f}" for k, v in logging_output.items())) if (0 < trainer.num_updates and trainer.num_updates % args.save_checkpoint_every_niter == 0 and args.is_master): # Save model checkpoint logger.info("** ** * Saving checkpoint file ** ** * ") trainer.save_checkpoint(checkpoint_file) logger.info(f'Epoch {epoch} finished.') if args.is_master: # Save a trained table_bert logger.info("** ** * Saving fine-tuned table_bert ** ** * ") model_to_save = model_ptr # Only save the table_bert it-self output_model_file = args.output_dir / f"pytorch_model_epoch{epoch:02d}.bin" torch.save(model_to_save.state_dict(), str(output_model_file)) # perform validation logger.info("** ** * Perform validation ** ** * ") dev_results = trainer.validate(dev_set) if args.is_master: logger.info('** ** * Validation Results ** ** * ') logger.info(f'Epoch {epoch} Validation Results: {dev_results}') # flush logging information to disk sys.stderr.flush() trainer.next_epoch()
print("list_is_iterable:",isinstance(list_,Iterable)) print("dict_is_iterable:",isinstance(dict_,Iterable)) print("str_is_iterable:",isinstance(str_,Iterable)) print("list_generator_is_iteratable",isinstance((x for x in range(10)), Iterable)) #判断是否为迭代器 print("是否为迭代器") from collections import Iterator print("list_is_iterator:",isinstance(list_,Iterator)) print("dict_is_iterator:",isinstance(dict_,Iterator)) print("str_is_iterator:",isinstance(str_,Iterator)) print("list_generator_is_iterator",isinstance((x for x in range(10)), Iterator)) #iter print("使用iter使之成为迭代器") print("list_iter_is_iterator:",isinstance(iter(list_),Iterator)) print("dict_iter_is_iterator:",isinstance(iter(dict_),Iterator)) print("str_iter_is_iterator:",isinstance(iter(str_),Iterator)) ''' iterator并不存储 这是因为Python的Iterator对象表示的是一个数据流, Iterator对象可以被next()函数调用并不断返回下一个数据, 直到没有数据时抛出StopIteration错误。 可以把这个数据流看做是一个有序序列,但我们却不能提前知道序列的长度, 只能不断通过next()函数实现按需计算下一个数据, 所以Iterator的计算是惰性的,只有在需要返回下一个数据时它才会计算。 ''' #迭代中修改 list_test = [1,2,3,4,5]
def __iter__(self) -> Iterator[PackFile]: return iter(self._files.values())