Esempi in Python per iter

Esempio n. 1

0

Mostra file

File: Layout.py Progetto: Black-Cog/Anvil

	def add( self, added=None ):
		try : iter( added )
		except TypeError, te : added = [ added ]

		offsetX = self.__margin[1]
		offsetY = 0
		for item in added :
			item.setParent( self.layout )
			item.setVisible( True )

			x = item.geometry().x()
			y = item.geometry().y()
			w = item.geometry().width()
			h = item.geometry().height()

			item.setGeometry( offsetX, self.linesH, w, h )

			# increment line offset horizontal
			offsetX += w + 10

			if offsetY < h : offsetY = h

		# increment line offset height and width
		self.linesH += offsetY + 5
		if self.linesW < offsetX:
			self.linesW = offsetX

		# resize the layout in the case of a scroll
		if self.__scroll : self.layout.resize( self.linesW+self.__margin[0], self.linesH+self.__margin[1] )

Esempio n. 2

0

Mostra file

File: batch_htseq_count.py Progetto: Christian-B/grid_scripts

def open_sam(sam_filename, samtype):

    if samtype == "sam":
        SAM_or_BAM_Reader = HTSeq.SAM_Reader
    elif samtype == "bam":
        SAM_or_BAM_Reader = HTSeq.BAM_Reader
    else:
        raise ValueError("Unknown input format %s specified." % samtype)

    try:
        if sam_filename != "-":
            read_seq_file = SAM_or_BAM_Reader(sam_filename)
            read_seq = read_seq_file
            first_read = iter(read_seq).next()
        else:
            read_seq_file = SAM_or_BAM_Reader(sys.stdin)
            read_seq_iter = iter(read_seq_file)
            first_read = read_seq_iter.next()
            read_seq = itertools.chain([first_read], read_seq_iter)
        pe_mode = first_read.paired_end
    except:
        msg = "Error occured when reading beginning of SAM/BAM file.\n"
        sys.stderr.write(msg)
        raise

    try:
        yield (pe_mode, read_seq)
    except:
        sys.stderr.write("Error occured when processing SAM input (%s):\n" %
                         read_seq_file.get_line_number_string())
        raise

Esempio n. 3

0

Mostra file

File: test_functional.py Progetto: tayfun/celery

    def test_gen(self):
        g = regen(iter(list(range(10))))
        assert g[7] == 7
        assert g[6] == 6
        assert g[5] == 5
        assert g[4] == 4
        assert g[3] == 3
        assert g[2] == 2
        assert g[1] == 1
        assert g[0] == 0
        assert g.data, list(range(10))
        assert g[8] == 8
        assert g[0] == 0
        g = regen(iter(list(range(10))))
        assert g[0] == 0
        assert g[1] == 1
        assert g.data == list(range(10))
        g = regen(iter([1]))
        assert g[0] == 1
        with pytest.raises(IndexError):
            g[1]
        assert g.data == [1]

        g = regen(iter(list(range(10))))
        assert g[-1] == 9
        assert g[-2] == 8
        assert g[-3] == 7
        assert g[-4] == 6
        assert g[-5] == 5
        assert g[5] == 5
        assert g.data == list(range(10))

        assert list(iter(g)) == list(range(10))

Esempio n. 4

0

Mostra file

File: test_origin.py Progetto: dhersam/sos

    def test_admin_setup(self):
        # PUTs for account and 16 .hash's
        self.test_origin.app = FakeApp(iter(
           [('204 No Content', {}, '') for i in xrange(102)]))
        resp = Request.blank('/origin/.prep',
            environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Origin-Admin-User': '******',
                     'X-Origin-Admin-Key': 'unittest'}).get_response(
                     self.test_origin)
        self.assertEquals(resp.status_int, 204)
        self.assertEquals(self.test_origin.app.calls, 101)

        self.test_origin.app = FakeApp(iter(
           [('404 Not Found', {}, '')]))
        req = Request.blank('/origin/.prep',
            environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Origin-Admin-User': '******',
                     'X-Origin-Admin-Key': 'unittest'})
        self.assertRaises(Exception, req.get_response, self.test_origin)

        self.test_origin.app = FakeApp(iter(
           [('204 No Content', {}, ''), ('404 Not Found', {}, '')]))
        req = Request.blank('/origin/.prep',
            environ={'REQUEST_METHOD': 'PUT'},
            headers={'X-Origin-Admin-User': '******',
                     'X-Origin-Admin-Key': 'unittest'})
        self.assertRaises(Exception, req.get_response, self.test_origin)

Esempio n. 5

0

Mostra file

File: test_bison_lalr.py Progetto: o11c/lr-parsers

def test_bison_lalr_repr_automaton_lr0():
    ex = grammar_examples.lr0.ex_minimal1
    grammar = ex.grammar

    automaton = compute_automaton(grammar)
    assert repr(automaton) == '<Automaton with 4 states>'
    assert repr(automaton._data) == '''
[<StateData #0 with 1 actions, 1 gotos
  <bison.ItemSet #0, size 2
    < $accept → • Root $eof ∥ >
    < Root → • term ∥ >
>>, <StateData #1 with 1 actions, 0 gotos
  <bison.ItemSet #1, size 1
    < Root → term • ∥ { $eof } >
>>, <StateData #2 with 1 actions, 0 gotos
  <bison.ItemSet #2, size 1
    < $accept → Root • $eof ∥ >
>>, <StateData #3 with 2 actions, 0 gotos
  <bison.ItemSet #3, size 1
    < $accept → Root $eof • ∥ >
>>]
    '''.strip().replace('•', _mdot).replace('∥', _parallel)
    assert repr(automaton._data[0]._id) == '''
<StateId for <StateData #0 with 1 actions, 1 gotos
  <bison.ItemSet #0, size 2
    < $accept → • Root $eof ∥ >
    < Root → • term ∥ >
>>>
'''.strip().replace('•', _mdot).replace('∥', _parallel)
    assert repr(next(iter(automaton._data[0]._actions.values()))) == 'Shift(<state 1>)'
    assert repr(next(iter(automaton._data[1]._actions.values()))) == 'Reduce(<rule 1>)'
    assert repr(next(iter(automaton._data[0]._gotos.values()))) == 'Goto(<state 2>)'

Esempio n. 6

0

Mostra file

File: jsonpickle_test.py Progetto: achernet/jsonpickle

 def __reduce__(self):
     return (PickleProtocol2ReduceListitemsAppend,  # callable
             (),  # args
             {},  # state
             iter(['foo', 'bar']),  # listitems
             iter([]),  # dictitems
             )

Esempio n. 7

0

Mostra file

File: analysismap.py Progetto: autosportlabs/RaceCapture_App

    def select_map(self, latitude, longitude):
        """
        Find and display a nearby track by latitude / longitude
        The selection will favor a previously selected track in the nearby area
        :param latitude
        :type  latitude float
        :param longitude
        :type longitude float
        :returns the selected track, or None if there are no nearby tracks
        :type Track 
        """

        if not latitude or not longitude:
            return None

        point = GeoPoint.fromPoint(latitude, longitude)
        nearby_tracks = self.track_manager.find_nearby_tracks(point)

        saved_tracks = self.get_pref_track_selections()

        saved_nearby_tracks = [t for t in nearby_tracks if t.track_id in saved_tracks]

        # select the saved nearby track or just a nearby track
        track = next(iter(saved_nearby_tracks), None)
        track = next(iter(nearby_tracks), None) if track is None else track

        if self.track != track:
            # only update the trackmap if it's changing
            self._select_track(track)
        return track

Esempio n. 8

0

Mostra file

File: jsonpickle_test.py Progetto: achernet/jsonpickle

 def __reduce__(self):
     return (type(self),  # callable
             ('yam', 1),  # args
             {'foo': 1},  # state
             iter([]),  # listitems
             iter([]),  # dictitems
             )

Esempio n. 9

0

Mostra file

File: jsonpickle_test.py Progetto: achernet/jsonpickle

 def __reduce__(self):
     return (PickleProtocol2ReduceTuple,  # callable
             ('yam', 1),  # args
             {'foo': 1},  # state
             iter([]),  # listitems
             iter([]),  # dictitems
             )

Esempio n. 10

0

Mostra file

File: jsonpickle_test.py Progetto: achernet/jsonpickle

 def __reduce__(self):
     return (protocol_2_reduce_tuple_func,  # callable
             ('yam', 1),  # args
             None,  # state
             iter([]),  # listitems
             iter([]),  # dictitems
             )

Esempio n. 11

0

Mostra file

File: jsonpickle_test.py Progetto: achernet/jsonpickle

 def __reduce__(self):
     return (__newobj__,  # callable
             (PickleProtocol2ReduceNewobj, 'yam', 1),  # args
             None,  # state
             iter([]),  # listitems
             iter([]),  # dictitems
             )

Esempio n. 12

0

Mostra file

File: chain_decomposition.py Progetto: adrianN/edge-connectivity

def dfs(G,source=None):
	"""Produce edges in a depth-first-search starting at source. 
	Edges are tagged as either 'tree' or 'back'"""
	# Very slight modification of the DFS procedure from networkx
	# One could unify this with compute_information, but it seemed cleaner this way
	if source is None:
		# produce edges for all components
		nodes=G
	else:
		# produce edges for components with source
		nodes=[source]
	visited=set()
	for start in nodes:
		if start in visited:
			continue
		visited.add(start)
		stack = [(start,iter(G[start]))]
		while stack:
			parent,children = stack[-1]
			try:
				child = next(children)
				if child not in visited:
					yield parent,child,'tree'
					visited.add(child)
					stack.append((child,iter(G[child])))
				else:
					yield parent,child,'back'
			except StopIteration:
				stack.pop()

Esempio n. 13

0

Mostra file

File: ann.py Progetto: taylorjacklespriggs/sigex

 def train(self, inp, out, training_weight=1.):
     inp = np.mat(inp).T
     out = np.mat(out).T
     deriv = []
     val = inp
     vals = [val]
     # forward calculation of activations and derivatives
     for weight,bias in self.__weights:
         val = weight*val
         val += bias
         deriv.append(self.__derivative(val))
         vals.append(self.__activation(val))
     deriv = iter(reversed(deriv))
     weights = iter(reversed(self.__weights))
     errs = []
     errs.append(np.multiply(vals[-1]-out, next(deriv)))
     # backwards propagation of errors
     for (w,b),d in zip(weights, deriv):
         errs.append(np.multiply(np.dot(w.T, errs[-1]), d))
     weights = iter(self.__weights)
     for (w,b),v,e in zip(\
             self.__weights,\
             vals, reversed(errs)):
         e *= self.__learning_rate*training_weight
         w -= e*v.T
         b -= e
     tmp = vals[-1]-out
     return np.dot(tmp[0].T,tmp[0])*.5*training_weight

Esempio n. 14

0

Mostra file

File: suite.py Progetto: LumaPictures/rez

def _isnotsuite(test):
    "A crude way to tell apart testcases and suites with duck-typing"
    try:
        iter(test)
    except TypeError:
        return True
    return False

Esempio n. 15

0

Mostra file

File: test_core.py Progetto: joyrexus/toolz

def test_count():
    assert count((1, 2, 3)) == 3
    assert count([]) == 0
    assert count(iter((1, 2, 3, 4))) == 4

    assert count("hello") == 5
    assert count(iter("hello")) == 5

Esempio n. 16

0

Mostra file

File: evidence.py Progetto: eggduzao/reg-gen

    def create_file(self):
        # Expanding summits
        tfbs_summit_regions = GenomicRegionSet("TFBS Summit Regions")
        tfbs_summit_regions.read_bed(self.tfbs_summit_fname)

        for region in iter(tfbs_summit_regions):
            summit = int(region.data.split()[-1]) + region.initial
            region.initial = max(summit - (self.peak_ext / 2), 0)
            region.final = summit + (self.peak_ext / 2)

        # Calculating intersections
        mpbs_regions = GenomicRegionSet("MPBS Regions")
        mpbs_regions.read_bed(self.mpbs_fname)

        tfbs_summit_regions.sort()
        mpbs_regions.sort()

        with_overlap_regions = mpbs_regions.intersect(tfbs_summit_regions, mode=OverlapType.ORIGINAL)
        without_overlap_regions = mpbs_regions.subtract(tfbs_summit_regions, whole_region=True)
        tfbs_regions = GenomicRegionSet("TFBS Regions")

        for region in iter(with_overlap_regions):
            region.name = region.name.split(":")[0] + ":Y"
            tfbs_regions.add(region)

        for region in iter(without_overlap_regions):
            region.name = region.name.split(":")[0] + ":N"
            tfbs_regions.add(region)

        tfbs_regions.sort()

        tfbs_fname = os.path.join(self.output_location, "{}.bed".format(self.mpbs_name))
        tfbs_regions.write_bed(tfbs_fname)

Esempio n. 17

0

Mostra file

File: sputrifis.py Progetto: jrevillas/spotify-mp3-ripper

	def run(self):
		container_loaded.wait()
		container_loaded.clear()

		link = Link.from_string(sys.argv[3])
		if link.type() == Link.LINK_TRACK:
			track = link.as_track()
			itrack = iter([track])
		elif link.type() == Link.LINK_PLAYLIST:
			playlist = link.as_playlist()
			print('loading playlist...')
			while not playlist.is_loaded():
				time.sleep(0.1)
			print('done')
			itrack = iter(playlist)

		session = self.ripper.session
		for track in itrack:

			self.ripper.load_track(track)

			rip_init(session, track)

			self.ripper.play()

			end_of_track.wait()
			end_of_track.clear()

			rip_terminate(session, track)
			rip_id3(session, track)

		self.ripper.disconnect()

Esempio n. 18

0

Mostra file

File: config_flow.py Progetto: fbradyirl/home-assistant

    async def async_step_import(self, user_input):
        """Import a config entry."""
        if self.hass.config_entries.async_entries(DOMAIN):
            return self.async_abort(reason='already_setup')

        self._scan_interval = user_input[KEY_SCAN_INTERVAL]
        if user_input[CONF_HOST] != DOMAIN:
            self._hosts.append(user_input[CONF_HOST])

        if not await self.hass.async_add_executor_job(
                os.path.isfile, self.hass.config.path(TELLDUS_CONFIG_FILE)):
            return await self.async_step_user()

        conf = await self.hass.async_add_executor_job(
            load_json, self.hass.config.path(TELLDUS_CONFIG_FILE))
        host = next(iter(conf))

        if user_input[CONF_HOST] != host:
            return await self.async_step_user()

        host = CLOUD_NAME if host == 'tellduslive' else host
        return self.async_create_entry(
            title=host,
            data={
                CONF_HOST: host,
                KEY_SCAN_INTERVAL: self._scan_interval.seconds,
                KEY_SESSION: next(iter(conf.values())),
            })

Esempio n. 19

0

Mostra file

File: test_main.py Progetto: pombredanne/apyori

 def load_transactions_mock(input_file, **kwargs):
     """ Mock for apyori.load_transactions. """
     eq_(kwargs['delimiter'], delimiter)
     eq_(next(input_file), inputs[0])
     yield iter(input_transactions[0])
     eq_(next(input_file), inputs[1])
     yield iter(input_transactions[1])

Esempio n. 20

0

Mostra file

File: test_dedupe.py Progetto: beng/dedupe

  def setUp(self) :
    random.seed(123)

    self.ids_str = iter([('1', '2'), ('2', '3'), ('4', '5'), ('6', '7'), ('8','9')])

    self.records = iter([({'name': 'Margret', 'age': '32'}, {'name': 'Marga', 'age': '33'}), \
                         ({'name': 'Marga', 'age': '33'}, {'name': 'Maria', 'age': '19'}), \
                         ({'name': 'Maria', 'age': '19'}, {'name': 'Monica', 'age': '39'}), \
                         ({'name': 'Monica', 'age': '39'}, {'name': 'Mira', 'age': '47'}), \
                         ({'name': 'Mira', 'age': '47'}, {'name': 'Mona', 'age': '9'}),
                        ])

    self.normalizedAffineGapDistance = dedupe.affinegap.normalizedAffineGapDistance
    self.data_model = {}
    self.data_model['fields'] = dedupe.core.OrderedDict()
    v = {}
    v.update({'Has Missing': False, 'type': 'String', 'comparator': self.normalizedAffineGapDistance, \
              'weight': -1.0302742719650269})
    self.data_model['fields']['name'] = v
    self.data_model['bias'] = 4.76

    score_dtype = [('pairs', 'S1', 2), ('score', 'f4', 1)]
    self.desired_scored_pairs = numpy.array([(['1', '2'], 0.96), (['2', '3'], 0.96), \
                                             (['4', '5'], 0.78), (['6', '7'], 0.72), \
                                             (['8', '9'], 0.84)], dtype=score_dtype)

Esempio n. 21

0

Mostra file

File: playfield.py Progetto: robmcmullen/omnivore

 def set_current_draw_pattern(self, pattern, control):
     try:
         iter(pattern)
     except TypeError:
         self.draw_pattern = [pattern]
     else:
         self.draw_pattern = pattern

Esempio n. 22

0

Mostra file

File: ipod.py Progetto: aguilajesus/plaso

  def GetEntries(self, parser_mediator, match=None, **unused_kwargs):
    """Extract device information from the iPod plist.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      match (Optional[dict[str: object]]): keys extracted from PLIST_KEYS.
    """
    devices = match.get('Devices', {})
    for device_identifier, device_information in iter(devices.items()):
      datetime_value = device_information.get('Connected', None)
      if not datetime_value:
        continue

      event_data = IPodPlistEventData()
      event_data.device_id = device_identifier

      # TODO: refactor.
      for key, value in iter(device_information.items()):
        if key == 'Connected':
          continue
        attribute_name = key.lower().replace(' ', '_')
        setattr(event_data, attribute_name, value)

      event = time_events.PythonDatetimeEvent(
          datetime_value, definitions.TIME_DESCRIPTION_LAST_CONNECTED)
      parser_mediator.ProduceEventWithEventData(event, event_data)

Esempio n. 23

0

Mostra file

File: tagdirectory.py Progetto: chapmanb/MultiQC

    def tag_info_chart (self):

        """ Make the taginfo.txt plot """

        ## TODO: human chrs on hg19. How will this work with GRCh genome or other, non human, genomes?
        # nice if they are ordered by size
        ucsc = ["chr" + str(i) for i in range(1,23)].append([ "chrX", "chrY", "chrM"])
        ensembl = list(range(1,23)).append([ "X", "Y", "MT"])
        pconfig = {
            'id': 'tagInfo',
            'title': 'Homer: Tag Info Distribution',
            'ylab': 'Tags',
            'cpswitch_counts_label': 'Number of Tags'
        }

        ## check if chromosomes starts with "chr" (UCSC) or "#" (ensembl)
        sample1 = next(iter(self.tagdir_data['taginfo_total']))
        chrFormat = next(iter(self.tagdir_data['taginfo_total'][sample1]))

        if ("chr" in chrFormat):
            chrs = ucsc
        else:
            chrs = ensembl

        return bargraph.plot(self.tagdir_data['taginfo_total'], chrs, pconfig)

Esempio n. 24

0

Mostra file

File: progress.py Progetto: oerms/RelaxSim

 def __init__(self, iterator, unit=None, computer_prefix=None, display=MULTI_LINE):
     """Create a new progress display.
     'iterator' is the iterator containing the work to be done.
     'unit' is the unit to be displayed to the user.
     'computer_prefix' should be set to True if this unit requires prefix
     increments of 1024 instead of the traditional 1000. If it is not set,
     then the class tries to guess based on 'unit'.
     'display' defaults to MULTI_LINE to print a new line for every update,
     or can be SINGLE_LINE to keep updating a single status line.
     """
     if hasattr(iterator, "__len__"):
         # This may be an expensive operation, for instance on a
         # hypothetical os.walk() which implements __len__.
         length = len(iterator)
         self.iterator = iter(iterator)
     else:
         list = []
         # TODO: isn't there some kind of builtin expand operation?
         for i in iterator:
             list.append(i)
         length = len(list)
         self.iterator = iter(list)
     self.progress = Progress(length, unit, computer_prefix)
     self.display = display
     # The first call to next is before the work actually starts, so we
     # shouldn't increment() at that point.
     self.first = True

Esempio n. 25

0

Mostra file

File: seqio.py Progetto: smorfopoulou/viral_denovo_pipeline

	def __iter__(self):
		# Avoid usage of zip() below since it will consume one item too many.
		it1, it2 = iter(self.reader1), iter(self.reader2)
		while True:
			try:
				r1 = next(it1)
			except StopIteration:
				# End of file 1. Make sure that file 2 is also at end.
				try:
					next(it2)
					raise FormatError("Reads are improperly paired. There are more reads in file 2 than in file 1.")
				except StopIteration:
					pass
				break
			try:
				r2 = next(it2)
			except StopIteration:
				raise FormatError("Reads are improperly paired. There are more reads in file 1 than in file 2.")

			name1 = r1.name.split(None, 1)[0]
			name2 = r2.name.split(None, 1)[0]
			if name1[-2:-1] == '/':
				name1 = name1[:-2]
			if name2[-2:-1] == '/':
				name2 = name2[:-2]
			if name1 != name2:
				raise FormatError("Reads are improperly paired. Read name '{0}' in file 1 not equal to '{1}' in file 2.".format(name1, name2))
			yield (r1, r2)

Esempio n. 26

0

Mostra file

File: test_origin.py Progetto: dhersam/sos

    def test_cdn_get_no_content(self):
        prev_data = json.dumps({'account': 'acc', 'container': 'cont',
                'ttl': 1234, 'logs_enabled': True, 'cdn_enabled': True})
        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, prev_data), # call to _get_cdn_data
            ('304 No Content', {}, '')])) #call to get obj
        req = Request.blank('http://1234.r34.origin_cdn.com:8080/obj1.jpg',
            environ={'REQUEST_METHOD': 'HEAD',
                     'swift.cdn_hash': 'abcd',
                     'swift.cdn_object_name': 'obj1.jpg'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 304)

        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, prev_data), # call to _get_cdn_data
            ('404 No Content', {}, '')])) #call to get obj
        req = Request.blank('http://1234.r34.origin_cdn.com:8080/obj1.jpg',
            environ={'REQUEST_METHOD': 'HEAD',
                     'swift.cdn_hash': 'abcd',
                     'swift.cdn_object_name': 'obj1.jpg'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 404)

        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, prev_data), # call to _get_cdn_data
            ('416 No Content', {}, '')])) #call to get obj
        req = Request.blank('http://1234.r34.origin_cdn.com:8080/obj1.jpg',
            environ={'REQUEST_METHOD': 'HEAD',
                     'swift.cdn_hash': 'abcd',
                     'swift.cdn_object_name': 'obj1.jpg'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 416)

Esempio n. 27

0

Mostra file

File: test_origin.py Progetto: dhersam/sos

    def test_cdn_get_regex(self):
        prev_data = json.dumps({'account': 'acc', 'container': 'cont',
                'ttl': 1234, 'logs_enabled': True, 'cdn_enabled': True})

        def check_urls(req):
            vrs, acc, cont, obj = utils.split_path(req.path, 1, 4)
            self.assertEquals(acc, 'acc')
            self.assertEquals(cont, 'cont')
            self.assertEquals(obj, 'obj1.jpg')

        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, prev_data), # call to _get_cdn_data
            ('304 No Content', {}, '', check_urls)])) #call to get obj
        req = Request.blank('http://1234.r3.origin_cdn.com:8080/obj1.jpg',
            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 304)

        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, prev_data), # call to _get_cdn_data
            ('304 No Content', {}, '', check_urls)])) #call to get obj
        req = Request.blank('http://r3.origin_cdn.com:8080/nohash/obj1.jpg',
            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 404)

Esempio n. 28

0

Mostra file

File: rst_out.py Progetto: rciorba/moin-2.0-mirror

    def open_moinpage_part(self, elem):
        type = elem.get(moin_page.content_type, u"").split(u';')
        if len(type) == 2:
            if type[0] == u"x-moin/macro":
                if len(elem) and iter(elem).next().tag.name == "arguments":
                    alt = u"<<{0}({1})>>".format(type[1].split(u'=')[1], u','.join(
                        [u''.join(c.itertext()) for c in iter(elem).next() if c.tag.name == "argument"]))
                else:
                    alt = u"<<{0}()>>".format(type[1].split(u'=')[1])

                obj = u".. |{0}| macro:: {1}".format(alt, alt)
                self.objects.append(obj)
                return u" |{0}| ".format(alt)
            elif type[0] == u"x-moin/format":
                elem_it = iter(elem)
                ret = u"\n\n.. parser:{0}".format(type[1].split(u'=')[1])
                if len(elem) and elem_it.next().tag.name == "arguments":
                    args = []
                    for arg in iter(elem).next():
                        if arg.tag.name == "argument":
                            args.append(u"{0}=\"{1}\"".format(arg.get(moin_page.name, u""), u' '.join(arg.itertext())))
                    ret = u'{0} {1}'.format(ret, u' '.join(args))
                    elem = elem_it.next()
                ret = u"{0}\n  {1}".format(ret, u' '.join(elem.itertext()))
                return ret
        return elem.get(moin_page.alt, u'') + u"\n"

Esempio n. 29

0

Mostra file

File: test_origin.py Progetto: dhersam/sos

    def test_origin_db_post_fail(self):
        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, ''), # call to _get_cdn_data
            ('404 Not Found', {}, ''), # put to .hash
            ]))
        req = Request.blank('http://origin_db.com:8080/v1/acc/cont',
            environ={'REQUEST_METHOD': 'PUT'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 500)

        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, ''), # call to _get_cdn_data
            ('204 No Content', {}, ''), # put to .hash
            ('404 Not Found', {}, ''), # HEAD check to list container
            ('404 Not Found', {}, ''), # PUT to list container
            ]))
        req = Request.blank('http://origin_db.com:8080/v1/acc/cont',
            environ={'REQUEST_METHOD': 'PUT'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 500)

        self.test_origin.app = FakeApp(iter([
            ('204 No Content', {}, ''), # call to _get_cdn_data
            ('204 No Content', {}, ''), # put to .hash
            ('204 No Content', {}, ''), # HEAD check to list container
            ('404 Not Found', {}, ''), # PUT to list container
            ]))
        req = Request.blank('http://origin_db.com:8080/v1/acc/cont',
            environ={'REQUEST_METHOD': 'PUT'})
        resp = req.get_response(self.test_origin)
        self.assertEquals(resp.status_int, 500)

Esempio n. 30

0

Mostra file

File: RHUI-STRESS-DOWNLOAD.py Progetto: AvijitCGit/azure-linux-automation

def AnalyseResult(l_download):
	success_download_count, fail_download_count, timeout_download_count = 0,0,0

	try:
		if len(l_download) != 0 :
			for i in iter(l_download):
				if i[0] == 'success':
					success_download_count += 1
				elif i[0] == 'fail':
					fail_download_count += 1
				else:
					timeout_download_count += 1
			
			cost_of_valid_download = [x[1] for x in iter(l_download) if x[0] == 'success']

			# summary
			logger.info('-'*30 + "SUMMARY" + '-'*30)
			logger.info('Total Download: %s, Success: %s, Fail: %s, Timeout: %s' % (len(l_download),success_download_count,fail_download_count,timeout_download_count))
			if len(cost_of_valid_download):
				logger.info('\tThe fastest download in %s seconds' % min(cost_of_valid_download))
				logger.info('\tThe slowest download in %s seconds' % max(cost_of_valid_download))
				logger.info('\tThe average download in %s seconds' % str(sum(cost_of_valid_download)/len(cost_of_valid_download)))
			else:
				logger.error('\tNone valid download!!!')
				
			if fail_download_count == 0 and timeout_download_count == 0:
				ResultLog.info('PASS')
			else:
				ResultLog.error('FAIL')
	except Exception as err:
		print(err)

Esempio n. 31

0

Mostra file

File: digit_pytorch.py Progetto: Fazleem/Pytorch_PytorchLightning

# Dataset & Dataloader
train_dataset = torchvision.datasets.MNIST(
    root="./data", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = torchvision.datasets.MNIST(
    root="./data", train=False, transform=transforms.ToTensor()
)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False
)

# look at one batch of data, using iter we can see one batch of data
examples = iter(train_loader)
samples, labels = examples.next()
print(samples.shape, labels.shape)

for i in range(5):
    plt.subplot(3, 2, i + 1)
    plt.imshow(samples[i][0])
# plt.show()


# model building
class DigitNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_classes):
        super(DigitNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size).cuda()
        self.relu = nn.ReLU().cuda()

Esempio n. 32

0

Mostra file

File: common.py Progetto: Achillesxu/appstore

def md5sum(filename, block_size=65536):
    my_hash = hashlib.md5()
    with open(filename, "r+b") as f:
        for block in iter(lambda: f.read(block_size), ""):
            my_hash.update(block)
    return my_hash.hexdigest()

Esempio n. 33

0

Mostra file

def scanlist(iprange, portranges, methods):
    for ip in iter(iprange):
        for portrange in portranges:
            for port in portrange:
                for method in methods:
                    yield (ip, port, method)

Esempio n. 34

0

Mostra file

            name = '_sip._' + proto + '.' + domainname + '.'
            try:
                log.debug('trying to resolve SRV for %s' % name)
                ans = dns.resolver.query(name, 'SRV')
            except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer), err:
                log.info('Could not resolve %s' % name)
                continue
            for a in ans.response.answer:
                log.info('got an answer %s' % a.to_text())
                for _tmp in a:
                    for method in methods:
                        try:
                            hostname = socket.gethostbyname(
                                _tmp.target.to_text())
                        except socket.error:
                            log.warn("%s could not be resolved" %
                                     _tmp.target.to_text())
                            continue
                        log.debug("%s resolved to %s" %
                                  (_tmp.target.to_text(), hostname))
                        yield (hostname, _tmp.port, method)


if __name__ == '__main__':
    print getranges('1.1.1.1/24')
    seq = getranges('google.com/24')
    if seq is not None:
        a = ip4range(seq)
        for x in iter(a):
            print x

Esempio n. 35

0

Mostra file

File: test_trainer_tricks.py Progetto: sudohainguyen/pytorch-lightning

def test_overfit_batch_limits(tmpdir):
    # ------------------------------------------------------
    # Make sure shuffle is correct across loaders initially
    # ------------------------------------------------------
    model = EvalModelTemplate()
    model.train_dataloader()

    # original train loader which should be replaced in all methods
    train_loader = model.train_dataloader()

    # make sure the val and tests are not shuffled
    assert isinstance(train_loader.sampler, RandomSampler)
    assert isinstance(model.val_dataloader().sampler, SequentialSampler)
    assert isinstance(model.test_dataloader().sampler, SequentialSampler)

    # ------------------------------------------------------
    # get the training loader and batch
    # ------------------------------------------------------
    train_loader = DataLoader(model.train_dataloader().dataset, shuffle=False)
    full_train_samples = len(train_loader)
    num_train_samples = int(0.11 * full_train_samples)

    (xa, ya) = next(iter(train_loader))

    # ------------------------------------------------------
    # set VAL and Test loaders
    # ------------------------------------------------------
    val_loader = DataLoader(model.val_dataloader().dataset, shuffle=False)
    test_loader = DataLoader(model.test_dataloader().dataset, shuffle=False)

    # set the model loaders
    model.train_dataloader = lambda: train_loader
    model.val_dataloader = lambda: val_loader
    model.test_dataloader = lambda: test_loader

    # ------------------------------------------------------
    # test train loader applies correct limits
    # ------------------------------------------------------
    trainer = Trainer(overfit_batches=4)
    trainer.reset_train_dataloader(model)
    assert trainer.num_training_batches == 4

    # make sure the loaders are the same
    (xb, yb) = next(iter(trainer.train_dataloader))
    assert torch.eq(xa, xb).all()
    assert torch.eq(ya, yb).all()

    trainer = Trainer(overfit_batches=0.11)
    trainer.reset_train_dataloader(model)
    assert trainer.train_dataloader is train_loader
    assert trainer.num_training_batches == num_train_samples

    # make sure the loaders are the same
    (xb, yb) = next(iter(trainer.train_dataloader))
    assert torch.eq(xa, xb).all()
    assert torch.eq(ya, yb).all()

    # ------------------------------------------------------
    # run tests for both val and test
    # ------------------------------------------------------
    for split in ['val', 'test']:

        # ------------------------------------------------------
        # test overfit_batches as percent
        # ------------------------------------------------------
        loader_num_batches, dataloaders = Trainer(overfit_batches=0.11)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == num_train_samples

        # make sure we turned off shuffle for the user
        assert isinstance(dataloaders[0].sampler, SequentialSampler)

        # make sure the loaders are the same
        (xb, yb) = next(iter(dataloaders[0]))
        assert torch.eq(xa, xb).all()
        assert torch.eq(ya, yb).all()

        # ------------------------------------------------------
        # test overfit_batches as int
        # ------------------------------------------------------
        loader_num_batches, dataloaders = Trainer(overfit_batches=1)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == 1
        loader_num_batches, dataloaders = Trainer(overfit_batches=5)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == 5

        # ------------------------------------------------------
        # test limit_xxx_batches as percent AND int
        # ------------------------------------------------------
        if split == 'val':
            loader_num_batches, dataloaders = Trainer(limit_val_batches=0.1)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == int(0.1 * len(val_loader))

            loader_num_batches, dataloaders = Trainer(limit_val_batches=10)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == 10
        else:
            loader_num_batches, dataloaders = Trainer(limit_test_batches=0.1)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == int(0.1 * len(test_loader))

            loader_num_batches, dataloaders = Trainer(limit_test_batches=10)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == 10

Esempio n. 36

0

Mostra file

File: __init__.py Progetto: sourabhtk37/insights-core

 def __init__(self, stream):
     self.stream = iter(stream)
     self.buffer = []
     self.pos = 0

Esempio n. 37

0

Mostra file

File: __init__.py Progetto: sourabhtk37/insights-core

 def __iter__(self):
     return iter(self.children)

Esempio n. 38

0

Mostra file

File: prepare_data.py Progetto: unixunion/nmt_project

def read_lines(file, amount, fillvalue=None):
    args = [iter(file)] * amount
    return zip_longest(*args, fillvalue=fillvalue)

Esempio n. 39

0

Mostra file

File: mythproto.py Progetto: sir-maniac/mythtv

 def sortedrun(self, *args, **kwargs):
     return iter(sorted(self.run(*args, **kwargs),
                        key=lambda p: p.starttime))

Esempio n. 40

0

Mostra file

File: graph.py Progetto: codeaudit/DeepCube

 def __iter__(self):
     return iter(self.variables)

Esempio n. 41

0

Mostra file

File: ismags.py Progetto: Cold5nap/sobolIter

    def _largest_common_subgraph(self, candidates, constraints, to_be_mapped=None):
        """
        Find all largest common subgraphs honoring constraints.
        """
        if to_be_mapped is None:
            to_be_mapped = {frozenset(self.subgraph.nodes)}

        # The LCS problem is basically a repeated subgraph isomorphism problem
        # with smaller and smaller subgraphs. We store the nodes that are
        # "part of" the subgraph in to_be_mapped, and we make it a little
        # smaller every iteration.

        # pylint disable becuase it's guarded against by default value
        current_size = len(
            next(iter(to_be_mapped), [])
        )  # pylint: disable=stop-iteration-return

        found_iso = False
        if current_size <= len(self.graph):
            # There's no point in trying to find isomorphisms of
            # graph >= subgraph if subgraph has more nodes than graph.

            # Try the isomorphism first with the nodes with lowest ID. So sort
            # them. Those are more likely to be part of the final
            # correspondence. This makes finding the first answer(s) faster. In
            # theory.
            for nodes in sorted(to_be_mapped, key=sorted):
                # Find the isomorphism between subgraph[to_be_mapped] <= graph
                next_sgn = min(nodes, key=lambda n: min(candidates[n], key=len))
                isomorphs = self._map_nodes(
                    next_sgn, candidates, constraints, to_be_mapped=nodes
                )

                # This is effectively `yield from isomorphs`, except that we look
                # whether an item was yielded.
                try:
                    item = next(isomorphs)
                except StopIteration:
                    pass
                else:
                    yield item
                    yield from isomorphs
                    found_iso = True

        # BASECASE
        if found_iso or current_size == 1:
            # Shrinking has no point because either 1) we end up with a smaller
            # common subgraph (and we want the largest), or 2) there'll be no
            # more subgraph.
            return

        left_to_be_mapped = set()
        for nodes in to_be_mapped:
            for sgn in nodes:
                # We're going to remove sgn from to_be_mapped, but subject to
                # symmetry constraints. We know that for every constraint we
                # have those subgraph nodes are equal. So whenever we would
                # remove the lower part of a constraint, remove the higher
                # instead. This is all dealth with by _remove_node. And because
                # left_to_be_mapped is a set, we don't do double work.

                # And finally, make the subgraph one node smaller.
                # REDUCTION
                new_nodes = self._remove_node(sgn, nodes, constraints)
                left_to_be_mapped.add(new_nodes)
        # COMBINATION
        yield from self._largest_common_subgraph(
            candidates, constraints, to_be_mapped=left_to_be_mapped
        )

Esempio n. 42

0

Mostra file

File: prepare_data.py Progetto: unixunion/nmt_project

def prepare():
    global vocab, written_lines

    # Files to be prepared
    files = {
        '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['src']).replace(
            preprocessing['train_folder'], '').lstrip('\\/'): {'amount': 1, 'up_to': -1},
        # copy all of data (up to "samples")
        '{}.{}'.format(hparams['dev_prefix'].replace('.bpe', ''), hparams['src']).replace(preprocessing['train_folder'],
                                                                                          '').lstrip('\\/'): {
            'amount': .1, 'up_to': preprocessing['test_size']},  # copy 1/10th but up to 'test_size'
        '{}.{}'.format(hparams['test_prefix'].replace('.bpe', ''), hparams['src']).replace(
            preprocessing['train_folder'], '').lstrip('\\/'): {'amount': .1, 'up_to': preprocessing['test_size']},
        '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['tgt']).replace(
            preprocessing['train_folder'], '').lstrip('\\/'): {'amount': 1, 'up_to': -1},
        '{}.{}'.format(hparams['dev_prefix'].replace('.bpe', ''), hparams['tgt']).replace(preprocessing['train_folder'],
                                                                                          '').lstrip('\\/'): {
            'amount': .1, 'up_to': preprocessing['test_size']},
        '{}.{}'.format(hparams['test_prefix'].replace('.bpe', ''), hparams['tgt']).replace(
            preprocessing['train_folder'], '').lstrip('\\/'): {'amount': .1, 'up_to': preprocessing['test_size']},
    }

    # pprint.pformat(files, indent=4)

    print(colorama.Fore.GREEN + "\nPreparing training set from raw set" + colorama.Fore.RESET)

    # Ensure that train folder exists
    try:
        os.makedirs(preprocessing['train_folder'])
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # Ensure that model/log folder exists
    train_log_dir = os.path.join(hparams['out_dir'], 'train_log')
    try:
        os.makedirs(train_log_dir)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    data_vocab = Counter()

    # Iterate thru files and prepare them
    for file_name, amounts in files.items():

        vocab = Counter()

        print("File: {}{}{}".format(colorama.Fore.GREEN, file_name, colorama.Fore.RESET))

        # Output file handler
        out_file = open('{}/{}'.format(preprocessing['train_folder'], file_name), 'w', encoding='utf-8',
                        buffering=131072)

        # Maximum number of lines
        read = 0
        amount = int(min(amounts['amount'] * preprocessing['samples'] if preprocessing['samples'] > 0 else 10 ** 20,
                         amounts['up_to'] if amounts['up_to'] > 0 else 10 ** 20))

        # Prepare thread variables
        write_thread = None
        vocab_thread = None
        written_lines = 0

        # We are going to use multiprocessing for tokenization, as it's cpu intensive
        with Pool(processes=preprocessing['cpu_count']) as pool:

            # Count number of lines in file
            progress = tqdm(ascii=True, unit=' lines', total=min(amount, sum(1 for _ in open(
                '{}/{}'.format(preprocessing['source_folder'], file_name), 'r', encoding='utf-8', buffering=131072))))

            # Open input file
            with open('{}/{}'.format(preprocessing['source_folder'], file_name), 'r', encoding='utf-8',
                      buffering=131072) as in_file:

                last_batch = False

                # Iterate every 10k lines
                for rows in read_lines(in_file, 10000, ''):

                    # If number of lines is greater than limit - break
                    read += len(rows)
                    if read >= amount:
                        rows = rows[:amount - read + len(rows)]
                        last_batch = True

                    # Process using multiprocessing
                    rows = pool.map(tokenize, rows, 100)

                    # Process vocab using multiprocessing
                    vocab_part = pool.map(sentence_split, rows, 100)

                    # Join running threads from previous loop
                    if write_thread is not None:
                        write_thread.join()
                        vocab_thread.join()
                        progress.update(written_lines)

                    # Thread for vocab update
                    vocab_thread = Thread(target=append_vocab, args=(vocab_part,))
                    vocab_thread.start()

                    # And thread for saving tokenized data to output file
                    write_thread = Thread(target=write_lines, args=(out_file, rows, written_lines == 0))
                    write_thread.start()

                    # Last batch - break / exit loop
                    if last_batch:
                        break

                # Join running threads and update progress bar
                write_thread.join()
                vocab_thread.join()
                progress.update(written_lines)
                progress.close()

        # If it's train file, save vocab
        if file_name == '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['src']).replace(
                preprocessing['train_folder'], '').lstrip('\\/'):
            data_vocab[hparams['src']] = vocab
        elif file_name == '{}.{}'.format(hparams['train_prefix'].replace('.bpe', ''), hparams['tgt']).replace(
                preprocessing['train_folder'], '').lstrip('\\/'):
            data_vocab[hparams['tgt']] = vocab

    # If joined vocab - add counters
    if preprocessing['joined_vocab']:
        data_vocab[hparams['src']] += data_vocab[hparams['tgt']]
        del data_vocab[hparams['tgt']]

    # BPE/WPM-like tokenization
    # inspired by and based on https://github.com/rsennrich/subword-nmt
    if preprocessing['use_bpe']:

        print(colorama.Fore.GREEN + "\nLearning BPE" + colorama.Fore.RESET)

        # List of subword joins to be applied to training data
        joins = {}

        # Final train vocab for NMT
        train_vocab = {}

        # Learn BPE for both vocabs (or common vocab)
        for source, raw_vocab in data_vocab.items():

            # Pair stats
            stats = Counter()

            # Pair indexes
            indices = defaultdict(lambda: defaultdict(int))

            # Build 'new' vocab used for BPE learning (train_vocab will be a final vocab for NMT)
            vocab = []
            train_vocab[source] = Counter()

            # Build vocab for BPE learning purpose
            print("Building temporary vocab ({})".format(hparams['src'] if preprocessing['joined_vocab'] else source))
            for i, (entity, freq) in tqdm(enumerate(raw_vocab.most_common()), ascii=True, unit=' tokens'):

                # Split vocab token
                entity = tuple(entity.split())

                # Make pairs ("ABCD" -> (A, B), (B, C), (C, D)), stats, indexes and train vocab
                prev_char = entity[0]
                train_vocab[source][prev_char] += freq
                for char in entity[1:]:
                    stats[prev_char, char] += freq
                    indices[prev_char, char][i] += 1
                    train_vocab[source][char] += freq
                    prev_char = char
                vocab.append((entity, freq))

            print("Learning BPE for vocab of {} tokens".format(preprocessing['vocab_size']))

            # List of joins per vocab
            joins[source] = []

            # Partial stats speeds up learning process - optimization for 'max' above
            partial_stats = Counter(['', -1])
            partial_stats_min = 0
            update_partial_stats = True

            # Current number of vocab tokens
            train_vocab_len = prev_train_vocab_len = len(train_vocab[source])

            # Progress bar
            progress = tqdm(ascii=True, unit=' tokens', total=preprocessing['vocab_size'], maxinterval=0.1, miniters=10)
            progress.monitor_interval = 1
            progress.update(prev_train_vocab_len)

            # Learn until vocab will contain desired number of tokens
            while train_vocab_len < preprocessing['vocab_size']:

                clean_train_vocab = False

                # Get most frequent pair
                most_frequent, freq = partial_stats.most_common(1)[0]

                # Update partial stats or frequency of most frequent pair is less than saved minimum for partial stats
                if update_partial_stats or freq <= partial_stats_min:
                    partial_stats_min = partial_stats.most_common(500)[-1][1]
                    partial_stats = Counter()
                    for k, v in stats.most_common():
                        if v < partial_stats_min:
                            break
                        partial_stats[k] = v
                    update_partial_stats = False

                    # Get most frequent pair (again, proper one this time)
                    most_frequent, _ = partial_stats.most_common(1)[0]

                # If frequency is lower than 2 - exit
                if stats[most_frequent] < 2:
                    print(
                        'No pair has frequency greater than 1. Stopping earlier, your vocab file will include less tokens.\n')
                    break

                # Replace pair "A B" with new entity "AB"

                # Changes made
                changes = []

                # Replace regex
                pattern = re.compile(r'(?<!\S)' + re.escape(' '.join(most_frequent)) + r'(?!\S)')

                # Loop through indices
                for j, freq in indices[most_frequent].items():

                    # Do not touch not existent pairs
                    if freq < 1:
                        continue

                    # Get entity and frequency
                    entity, freq = vocab[j]

                    # Replace "A B" with "AB" in entity
                    new_entity = pattern.sub(''.join(most_frequent), ' '.join(entity))
                    new_entity = tuple(new_entity.split())

                    # Update entity
                    vocab[j] = (new_entity, freq)

                    changes.append((j, new_entity, entity, freq))

                # Update indices and pair stats
                # Merged pair doesn't exist anymore
                stats[most_frequent] = 0
                partial_stats[most_frequent] = 0
                indices[most_frequent] = defaultdict(int)

                # Get entities and a new pair
                first, second = most_frequent
                new_pair = first + second

                # Iterate through all changes
                for j, entity, old_entity, freq in changes:

                    # Find all occurences of first pair entity
                    prev = -2
                    for i in iter([i for i, entity in enumerate(old_entity) if entity == first]):

                        # Do not touch second "B B" if "B B B"
                        if i == prev + 1:
                            continue

                        # Check if second pair entity follows first one
                        if i < len(old_entity) - 1 and old_entity[i + 1] == second:

                            # Reduce frequency of "A B" in "A B C D" where "B C" is a merged pair
                            if i:
                                prev = old_entity[i - 1:i + 1]
                                stats[prev] -= freq
                                partial_stats[prev] = stats[prev]
                                indices[prev][j] -= 1

                            # Reduce frequency of "C D" in "A B C D" where "B C" is a merged pair
                            if i < len(old_entity) - 2:

                                # But do not touch "C B" if "A B C B C" as values will be adjusted with next occurence of "B C" pair
                                if old_entity[i + 2] != first or i >= len(old_entity) - 3 or old_entity[
                                    i + 3] != second:
                                    next = old_entity[i + 1:i + 3]
                                    stats[next] -= freq
                                    partial_stats[next] = stats[next]
                                    indices[next][j] -= 1

                            prev = i

                            if train_vocab[source][first] <= freq or train_vocab[source][second] <= freq:
                                clean_train_vocab = True
                            train_vocab[source][first] -= freq
                            train_vocab[source][second] -= freq

                    # Find all occurences of first pair entity
                    for i in [i for i, entity in enumerate(entity) if entity == new_pair]:

                        # Increase frequency of (new pair) "A BC" in "A BC D"
                        if i:
                            prev = entity[i - 1:i + 1]
                            stats[prev] += freq
                            if stats[prev] > partial_stats_min:
                                update_partial_stats = True
                            indices[prev][j] += 1

                        # Increase frequency of (new pair) "BC D" in "A BC D", but do not touch if "A BC BC" as stats for "BC BC" will be adjusted win next occurence of "BC" pair
                        if i < len(entity) - 1 and entity[i + 1] != new_pair:
                            next = entity[i:i + 2]
                            stats[next] += freq
                            if stats[next] > partial_stats_min:
                                update_partial_stats = True
                            indices[next][j] += 1

                        # Set frequency of a new pair
                        train_vocab[source][new_pair] += freq

                # Current pair is merged - is not a pair anymore, so has frequency of 0
                stats[most_frequent] = 0
                partial_stats[most_frequent] = 0

                # Remove (from training vocab) tokens with frequency of 0
                if clean_train_vocab:
                    train_vocab[source] = +train_vocab[source]

                # Calculate current number of train vocab entities
                prev_train_vocab_len = train_vocab_len
                train_vocab_len = len(train_vocab[source])
                train_vocab_len_diff = train_vocab_len - prev_train_vocab_len

                # Update progress bar
                if train_vocab_len_diff >= 0:
                    progress.update(train_vocab_len_diff)

                # For a negative number set new value directly - tqdm doesn't support negative updates
                else:
                    progress.n += train_vocab_len_diff
                    progress.refresh()

                # Add new join pair
                joins[source].append(most_frequent)

            # Save list of joins for train vocab
            joins[source] = dict(reversed([(v, i) for i, v in enumerate(joins[source])]))

            # Done
            progress.close()

        # Save list of joins to a file (joined vocab) and replace main vocabs
        if preprocessing['joined_vocab']:
            with open('{}/{}'.format(preprocessing['train_folder'], 'bpe_joins.common.json'), 'w', encoding='utf-8',
                      buffering=131072) as bpe_file:
                json.dump({json.dumps(k): v for k, v in joins[hparams['src']].items()}, bpe_file)
            data_vocab[hparams['src']] = train_vocab[hparams['src']]

        # Save list of joins to files (separated vocab)
        else:
            with open('{}/{}'.format(preprocessing['train_folder'], 'bpe_joins.{}.json'.format(hparams['src'])), 'w',
                      encoding='utf-8', buffering=131072) as bpe_file:
                json.dump({json.dumps(k): v for k, v in joins[hparams['src']].items()}, bpe_file)
            with open('{}/{}'.format(preprocessing['train_folder'], 'bpe_joins.{}.json'.format(hparams['tgt'])), 'w',
                      encoding='utf-8', buffering=131072) as bpe_file:
                json.dump({json.dumps(k): v for k, v in joins[hparams['tgt']].items()}, bpe_file)
            data_vocab[hparams['src']] = train_vocab[hparams['src']]
            data_vocab[hparams['tgt']] = train_vocab[hparams['tgt']]

        print(colorama.Fore.GREEN + "\nApplying BPE" + colorama.Fore.RESET)

        # BPE files to be prepared
        bpe_files = [
            '{}.{}'.format(hparams['train_prefix'], hparams['src']).replace(preprocessing['train_folder'], '').lstrip(
                '\\/'),
            '{}.{}'.format(hparams['dev_prefix'], hparams['src']).replace(preprocessing['train_folder'], '').lstrip(
                '\\/'),
            '{}.{}'.format(hparams['test_prefix'], hparams['src']).replace(preprocessing['train_folder'], '').lstrip(
                '\\/'),
            '{}.{}'.format(hparams['train_prefix'], hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip(
                '\\/'),
            '{}.{}'.format(hparams['dev_prefix'], hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip(
                '\\/'),
            '{}.{}'.format(hparams['test_prefix'], hparams['tgt']).replace(preprocessing['train_folder'], '').lstrip(
                '\\/'),
        ]

        # Iterate thru files and apply BPE
        for i, file_name in enumerate(bpe_files):

            # Current train vocab
            source = hparams['src'] if preprocessing['joined_vocab'] else file_name.split('.')[-1]

            print("File: {}{}{}".format(colorama.Fore.GREEN, file_name, colorama.Fore.RESET))

            # Output file handler
            out_file = open('{}/{}'.format(preprocessing['train_folder'], file_name), 'w', encoding='utf-8',
                            buffering=131072)

            # Prepare thread variables
            write_thread = None
            written_lines = 0

            # We are going to use multiprocessing for joins, as it's cpu intensive
            with Pool(processes=preprocessing['cpu_count'], initializer=apply_bpe_init,
                      initargs=(joins[source],)) as pool:

                # Progress bar
                progress = tqdm(ascii=True, unit=' lines', total=sum(1 for _ in open(
                    '{}/{}'.format(preprocessing['train_folder'], file_name.replace('.bpe.', '.')), 'r',
                    encoding='utf-8', buffering=131072)))

                # Open input file
                with open('{}/{}'.format(preprocessing['train_folder'], file_name.replace('.bpe.', '.')), 'r',
                          encoding='utf-8', buffering=131072) as in_file:

                    # Iterate every 10k lines
                    for rows in read_lines(in_file, 10000, ''):

                        # Process using multiprocessing
                        rows = pool.map(apply_bpe, rows, 100)

                        # Join running threads from previous loop
                        if write_thread is not None:
                            write_thread.join()
                            # vocab_thread.join()
                            # print('+')
                            progress.update(written_lines)
                            # vocab_thread2.join()

                        # Thread for saving tokenized data to output BPE file
                        write_thread = Thread(target=write_lines, args=(out_file, rows, written_lines == 0))
                        write_thread.start()

                    # Join running threads and update progress bar
                    write_thread.join()
                    progress.update(written_lines)
                    progress.close()

            # Remove unnecessary train file (BPE one will be used by NMT)
            os.remove('{}/{}'.format(preprocessing['train_folder'], file_name.replace('.bpe.', '.')))

    print(colorama.Fore.GREEN + "\nPostprocessing and saving vocabs" + colorama.Fore.RESET)

    # Vocab files to be prepared
    # Joined vocab
    if preprocessing['joined_vocab']:
        vocab_files = [
            '{}.{}'.format(hparams['train_prefix'].replace('train', 'vocab'), hparams['src']).replace(
                preprocessing['train_folder'], '').lstrip('\\/'),
        ]

    # Separated vocabs
    else:
        vocab_files = [
            '{}.{}'.format(hparams['train_prefix'].replace('train', 'vocab'), hparams['src']).replace(
                preprocessing['train_folder'], '').lstrip('\\/'),
            '{}.{}'.format(hparams['train_prefix'].replace('train', 'vocab'), hparams['tgt']).replace(
                preprocessing['train_folder'], '').lstrip('\\/'),
        ]

    for vocab_file_name in vocab_files:
        print("File: {}{}{}".format(colorama.Fore.GREEN, vocab_file_name, colorama.Fore.RESET))

        # Get most common entities
        source = vocab_file_name.split('.')[-1]
        data_vocab[source] = [entity for entity, _ in data_vocab[source].most_common()]

        # Write entities to a file
        with open('{}/{}'.format(preprocessing['train_folder'], vocab_file_name), 'w', encoding='utf-8',
                  buffering=131072) as vocab_file:
            vocab_file.write("<unk>\n<s>\n</s>\n" + "\n".join(data_vocab[source][:preprocessing['vocab_size']]))
        with open('{}/{}'.format(preprocessing['train_folder'], vocab_file_name.replace('vocab', 'vocab_unused')), 'w',
                  encoding='utf-8', buffering=131072) as vocab_file:
            vocab_file.write("\n".join(data_vocab[source][preprocessing['vocab_size']:]))

    print(colorama.Fore.GREEN + "\nWriting pbtxt file" + colorama.Fore.RESET)

    # Write pbtxt file for metadata for embeddings
    with open('{}/{}'.format(os.path.join(train_log_dir), 'projector_config.pbtxt'), 'w', encoding='utf-8',
              buffering=131072) as pbtxt_file:
        pbtxt_file.write(('''embeddings {{\n    tensor_name: 'embeddings/decoder/embedding_decoder'\n    ''' +
                          '''metadata_path: '{}'\n}}\nembeddings {{\n    ''' +
                          '''tensor_name: 'embeddings/encoder/embedding_encoder'\n    metadata_path: '{}'\n}}''').format(
            '{}/{}'.format(preprocessing['train_folder'], vocab_files[0].replace('train', 'vocab')),
            '{}/{}'.format(preprocessing['train_folder'],
                           vocab_files[0 if preprocessing['joined_vocab'] else 1].replace('train', 'vocab'))
        ))

    print(colorama.Fore.GREEN + "\nAll done" + colorama.Fore.RESET)

Esempio n. 43

0

Mostra file

File: models.py Progetto: pyople/pyople

def dict_mp(*args):
    """Used as a dict compatible with multiprocess environment"""
    return dict(zip(*[iter(args)] * 2))

Esempio n. 44

0

Mostra file

File: nsigtf.py Progetto: toshiemon18/nsgt

def nsigtf_sl(cseq, gd, wins, nn, Ls=None, real=False, reducedform=0, measurefft=False, multithreading=False):
    cseq = iter(cseq)
    dtype = gd[0].dtype

    fft = fftp(measure=measurefft, dtype=dtype)
    ifft = irfftp(measure=measurefft, dtype=dtype) if real else ifftp(measure=measurefft, dtype=dtype)
    
    if real:
        ln = len(gd)//2+1-reducedform*2
        fftsymm = lambda c: np.hstack((c[0],c[-1:0:-1])).conj()
        if reducedform:
            # no coefficients for f=0 and f=fs/2
            symm = lambda fc: chain(fc, imap(fftsymm,fc[::-1]))
            sl = lambda x: chain(x[reducedform:len(gd)//2+1-reducedform],x[len(gd)//2+reducedform:len(gd)+1-reducedform])
        else:
            symm = lambda fc: chain(fc,imap(fftsymm,fc[-2:0:-1]))
            sl = lambda x: x
    else:
        ln = len(gd)
        symm = lambda fc: fc
        sl = lambda x: x
        
    maxLg = max(len(gdii) for gdii in sl(gd))

    # get first slice
    c0 = cseq.next()

    fr = np.empty(nn, dtype=c0[0].dtype)  # Allocate output
    temp0 = np.empty(maxLg, dtype=fr.dtype)  # pre-allocation
    
    if multithreading and MP is not None:
        mmap = MP.Pool().map
    else:
        mmap = map

    loopparams = []
    for gdii,win_range in izip(sl(gd), sl(wins)):
        Lg = len(gdii)
        temp = temp0[:Lg]
        wr1 = win_range[:(Lg)//2]
        wr2 = win_range[-((Lg+1)//2):]
#        wr1,wr2 = win_range
        sl1 = slice(None, (Lg+1)//2)
        sl2 = slice(-(Lg//2), None)
        p = (gdii,wr1,wr2,sl1,sl2,temp)
        loopparams.append(p)
        
    # main loop over slices
    for c in chain((c0,),cseq):
        assert len(c) == ln

        # do transforms on coefficients
        # TODO: for matrixform we could do a FFT on the whole matrix along one axis
        # this could also be nicely parallalized
        fc = mmap(fft, c)
        fc = symm(fc)
        
        # The overlap-add procedure including multiplication with the synthesis windows
        fr = nsigtf_loop(loopparams, fr, fc)

        ftr = fr[:nn//2+1] if real else fr

        sig = ifft(ftr, outn=nn)

        sig = sig[:Ls] # Truncate the signal to original length (if given)

        yield sig

Esempio n. 45

0

Mostra file

File: models.py Progetto: pyople/pyople

 def __iter__(self):
     return iter(self.needed)

Esempio n. 46

0

Mostra file

 def __iter__(self):
     return iter(self.indices)

Esempio n. 47

0

Mostra file

    viewpoints = viewpoints.view((-1, m, *v_dims))

    # Partition into context and query sets
    context_idx, query_idx = indices[:-1], indices[-1]

    x, v = images[:, context_idx], viewpoints[:, context_idx]
    x_q, v_q = images[:, query_idx], viewpoints[:, query_idx]

    return x, v, x_q, v_q
import random

# Pick a scene to visualise
scene_id = 3

# Load data
x, v = next(iter(loader))
x_, v_ = x.squeeze(0), v.squeeze(0)

# Sample a set of views
n_context = 13 + 1
indices = random.sample([i for i in range(v_.size(1))], n_context)

# Seperate into context and query sets
x_c, v_c, x_q, v_q = deterministic_partition(x, v, indices)

# Visualise context and query images
f, axarr = plt.subplots(1, 15, figsize=(20, 7))
for i, ax in enumerate(axarr.flat):
    # Move channel dimension to end
    ax.imshow(x_[scene_id][i].permute(1, 2, 0))

Esempio n. 48

0

Mostra file

File: models.py Progetto: pyople/pyople

 def __iter__(self):
     return iter(self.keys())

Esempio n. 49

0

Mostra file

 def default(cls):
     try:
         return next(iter(cls))
     except StopIteration:
         return None

Esempio n. 50

0

Mostra file

File: test_torch_color_describer.py Progetto: insop/cs224u

def test_build_dataset(dataset):
    color_seqs, word_seqs, vocab = dataset
    mod = ContextualColorDescriber(vocab)
    dataset = mod.build_dataset(color_seqs, word_seqs)
    result = next(iter(dataset))
    assert len(result) == 3

Esempio n. 51

0

Mostra file

def test(arg=None):
    if arg == "-v":

        def say(*x):
            print(*x)
    else:

        def say(*x):
            pass

    say("Start Pool testing")

    get_tid = lambda: threading.current_thread().ident

    def return42():
        return 42

    def f(x):
        return x * x

    def work(mseconds):
        res = str(mseconds)
        if mseconds < 0:
            mseconds = -mseconds
        say("[%d] Start to work for %fms..." % (get_tid(), mseconds * 10))
        time.sleep(mseconds / 100.)
        say("[%d] Work done (%fms)." % (get_tid(), mseconds * 10))
        return res

    ### Test copy/pasted from multiprocessing
    pool = Pool(4)  # start worker threads

    # edge cases
    assert pool.map(return42, []) == []
    assert pool.apply_async(return42, []).get() == 42
    assert pool.apply(return42, []) == 42
    assert list(pool.imap(return42, iter([]))) == []
    assert list(pool.imap_unordered(return42, iter([]))) == []
    assert pool.map_async(return42, []).get() == []
    assert list(pool.imap_async(return42, iter([])).get()) == []
    assert list(pool.imap_unordered_async(return42, iter([])).get()) == []

    # basic tests
    result = pool.apply_async(f, (10, ))  # evaluate "f(10)" asynchronously
    assert result.get(timeout=1) == 100  # ... unless slow computer
    assert list(pool.map(f, range(10))) == list(map(f, range(10)))
    it = pool.imap(f, range(10))
    assert next(it) == 0
    assert next(it) == 1
    assert next(it) == 4

    # Test apply_sync exceptions
    result = pool.apply_async(time.sleep, (3, ))
    try:
        say(result.get(timeout=1))  # raises `TimeoutError`
    except TimeoutError:
        say("Good. Got expected timeout exception.")
    else:
        assert False, "Expected exception !"
    assert result.get() is None  # sleep() returns None

    def cb(s):
        say("Result ready: %s" % s)

    # Test imap()
    assert list(pool.imap(work, range(10, 3, -1),
                          chunksize=4)) == list(map(str, range(10, 3, -1)))

    # Test imap_unordered()
    assert sorted(pool.imap_unordered(work, range(10, 3, -1))) == sorted(
        map(str, range(10, 3, -1)))

    # Test map_async()
    result = pool.map_async(work, range(10), callback=cb)
    try:
        result.get(timeout=0.01)  # raises `TimeoutError`
    except TimeoutError:
        say("Good. Got expected timeout exception.")
    else:
        assert False, "Expected exception !"
    say(result.get())

    # Test imap_async()
    result = pool.imap_async(work, range(3, 10), callback=cb)
    try:
        result.get(timeout=0.01)  # raises `TimeoutError`
    except TimeoutError:
        say("Good. Got expected timeout exception.")
    else:
        assert False, "Expected exception !"
    for i in result.get():
        say("Item:", i)
    say("### Loop again:")
    for i in result.get():
        say("Item2:", i)

    # Test imap_unordered_async()
    result = pool.imap_unordered_async(work, range(10, 3, -1), callback=cb)
    try:
        say(result.get(timeout=0.01))  # raises `TimeoutError`
    except TimeoutError:
        say("Good. Got expected timeout exception.")
    else:
        assert False, "Expected exception !"
    for i in result.get():
        say("Item1:", i)
    for i in result.get():
        say("Item2:", i)
    r = result.get()
    for i in r:
        say("Item3:", i)
    for i in r:
        say("Item4:", i)
    for i in r:
        say("Item5:", i)

    #
    # The case for the exceptions
    #

    # Exceptions in imap_unordered_async()
    result = pool.imap_unordered_async(work, range(2, -10, -1), callback=cb)
    time.sleep(3)
    try:
        for i in result.get():
            say("Got item:", i)
    except (IOError, ValueError):
        say("Good. Got expected exception")

    # Exceptions in imap_async()
    result = pool.imap_async(work, range(2, -10, -1), callback=cb)
    time.sleep(3)
    try:
        for i in result.get():
            say("Got item:", i)
    except (IOError, ValueError):
        say("Good. Got expected exception")

    # Stop the test: need to stop the pool !!!
    pool.terminate()
    pool.join()

Esempio n. 52

0

Mostra file

 def __iter__(self):
     return iter(())

Esempio n. 53

0

Mostra file

File: test_translations.py Progetto: yangxhcaf/DataPlotly

 def setUp(self):
     """Runs before each test."""
     if 'LANG' in iter(os.environ.keys()):
         os.environ.__delitem__('LANG')

Esempio n. 54

0

Mostra file

 def __iter__(self):
     return iter(self.all)

Esempio n. 55

0

Mostra file

 def __iter__(self):
     return iter(np.asarray(self))

Esempio n. 56

0

Mostra file

File: test_translations.py Progetto: yangxhcaf/DataPlotly

 def tearDown(self):
     """Runs after each test."""
     if 'LANG' in iter(os.environ.keys()):
         os.environ.__delitem__('LANG')

Esempio n. 57

0

Mostra file

File: test_artXrealXGAN_SIDDXnoise_flow.py Progetto: marcelomata/synt_noise_GANs

#load best model
datagenTest = ImageDataGenerator()
datagenTest.config['random_crop_size'] = image_size
datagenTest.set_pipeline([random_crop,standardize,compute_fft2])
flow_test = datagenTest.flow_from_directory(test_clean_dir,batch_size=50,color_mode='rgbfft',target_size=image_size)
flow_test.setCurrentISO(SELECTED_ISO, test_noisy_dir)
flow_test.setGANdir(test_GAN_dir)


flow_test.batch_size = 3#batchsizes_for_isos[str(ISO_LEVEL)]
total_batch_size = 500

x = np.zeros((total_batch_size,image_size[0],image_size[1],6))
y_true = np.zeros((total_batch_size,7))
iter_flow = iter(flow_test)
for i in range(total_batch_size//flow_test.batch_size):
    if (((i*flow_test.batch_size)%len(flow_test.filenames))==0):
        flow_test.on_epoch_end()
        iter_flow = iter(flow_test)
    x_cur,y_cur = next(iter_flow)
    x[i*flow_test.batch_size:(i+1)*flow_test.batch_size] = x_cur
    y_true[i*flow_test.batch_size:(i+1)*flow_test.batch_size]=y_cur

model = create_model(image_size, num_classes=num_classes)
model.load_weights('trained_models/{}_SIDD_several_classes_weights.h5'.format(SELECTED_ISO))

y_pred = model.predict(x)

y_true = y_true.argmax(axis=1)
y_pred = y_pred.argmax(axis=1)

Esempio n. 58

0

Mostra file

File: train.py Progetto: CognitiveHorizons/curriculum-TableQA

def main():
    args = parse_train_arg()
    task = task_dict[args.task]

    init_distributed_mode(args)
    logger = init_logger(args)

    if hasattr(args, 'base_model_name'):
        logger.warning(
            'Argument base_model_name is deprecated! Use `--table-bert-extra-config` instead!'
        )

    init_signal_handler()

    train_data_dir = args.data_dir / 'train'
    dev_data_dir = args.data_dir / 'dev'
    table_bert_config = task['config'].from_file(
        args.data_dir / 'config.json', **args.table_bert_extra_config)

    if args.is_master:
        args.output_dir.mkdir(exist_ok=True, parents=True)
        with (args.output_dir / 'train_config.json').open('w') as f:
            json.dump(vars(args), f, indent=2, sort_keys=True, default=str)

        logger.info(f'Table Bert Config: {table_bert_config.to_log_string()}')

        # copy the table bert config file to the working directory
        # shutil.copy(args.data_dir / 'config.json', args.output_dir / 'tb_config.json')
        # save table BERT config
        table_bert_config.save(args.output_dir / 'tb_config.json')

    assert args.data_dir.is_dir(), \
        "--data_dir should point to the folder of files made by pregenerate_training_data.py!"

    if args.cpu:
        device = torch.device('cpu')
    else:
        device = torch.device(f'cuda:{torch.cuda.current_device()}')

    logger.info(
        "device: {} gpu_id: {}, distributed training: {}, 16-bits training: {}"
        .format(device, args.local_rank, bool(args.multi_gpu), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError(
            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
            .format(args.gradient_accumulation_steps))

    real_batch_size = args.train_batch_size  # // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if not args.cpu:
        torch.cuda.manual_seed_all(args.seed)

    if args.output_dir.is_dir() and list(args.output_dir.iterdir()):
        logger.warning(
            f"Output directory ({args.output_dir}) already exists and is not empty!"
        )
    args.output_dir.mkdir(parents=True, exist_ok=True)

    # Prepare model
    if args.multi_gpu and args.global_rank != 0:
        torch.distributed.barrier()

    if args.no_init:
        raise NotImplementedError
    else:
        model = task['model'](table_bert_config)

    if args.multi_gpu and args.global_rank == 0:
        torch.distributed.barrier()

    if args.fp16:
        model = model.half()

    model = model.to(device)
    if args.multi_gpu:
        if args.ddp_backend == 'pytorch':
            model = nn.parallel.DistributedDataParallel(
                model,
                find_unused_parameters=True,
                device_ids=[args.local_rank],
                output_device=args.local_rank,
                broadcast_buffers=False)
        else:
            import apex
            model = apex.parallel.DistributedDataParallel(model,
                                                          delay_allreduce=True)

        model_ptr = model.module
    else:
        model_ptr = model

    # set up update parameters for LR scheduler
    dataset_cls = task['dataset']

    train_set_info = dataset_cls.get_dataset_info(train_data_dir,
                                                  args.max_epoch)
    total_num_updates = train_set_info[
        'total_size'] // args.train_batch_size // args.world_size // args.gradient_accumulation_steps
    args.max_epoch = train_set_info['max_epoch']
    logger.info(
        f'Train data size: {train_set_info["total_size"]} for {args.max_epoch} epochs, total num. updates: {total_num_updates}'
    )

    args.total_num_update = total_num_updates
    args.warmup_updates = int(total_num_updates * 0.1)

    trainer = Trainer(model, args)

    checkpoint_file = args.output_dir / 'model.ckpt.bin'
    is_resumed = False
    # trainer.save_checkpoint(checkpoint_file)
    if checkpoint_file.exists():
        logger.info(f'Logging checkpoint file {checkpoint_file}')
        is_resumed = True
        trainer.load_checkpoint(checkpoint_file)

    model.train()

    # we also partitation the dev set for every local process
    logger.info('Loading dev set...')
    sys.stdout.flush()
    dev_set = dataset_cls(epoch=0,
                          training_path=dev_data_dir,
                          tokenizer=model_ptr.tokenizer,
                          config=table_bert_config,
                          multi_gpu=args.multi_gpu,
                          debug=args.debug_dataset)

    logger.info("***** Running training *****")
    logger.info(f"  Current config: {args}")

    if trainer.num_updates > 0:
        logger.info(f'Resume training at epoch {trainer.epoch}, '
                    f'epoch step {trainer.in_epoch_step}, '
                    f'global step {trainer.num_updates}')

    start_epoch = trainer.epoch
    for epoch in range(start_epoch, args.max_epoch):  # inclusive
        model.train()

        with torch.random.fork_rng(
                devices=None if args.cpu else [device.index]):
            torch.random.manual_seed(131 + epoch)

            epoch_dataset = dataset_cls(epoch=trainer.epoch,
                                        training_path=train_data_dir,
                                        config=table_bert_config,
                                        tokenizer=model_ptr.tokenizer,
                                        multi_gpu=args.multi_gpu,
                                        debug=args.debug_dataset)
            train_sampler = RandomSampler(epoch_dataset)
            train_dataloader = DataLoader(epoch_dataset,
                                          sampler=train_sampler,
                                          batch_size=real_batch_size,
                                          num_workers=0,
                                          collate_fn=epoch_dataset.collate)

        samples_iter = GroupedIterator(iter(train_dataloader),
                                       args.gradient_accumulation_steps)
        trainer.resume_batch_loader(samples_iter)

        with tqdm(total=len(samples_iter),
                  initial=trainer.in_epoch_step,
                  desc=f"Epoch {epoch}",
                  file=sys.stdout,
                  disable=not args.is_master,
                  miniters=100) as pbar:

            for samples in samples_iter:
                logging_output = trainer.train_step(samples)

                pbar.update(1)
                pbar.set_postfix_str(', '.join(
                    f"{k}: {v:.4f}" for k, v in logging_output.items()))

                if (0 < trainer.num_updates and trainer.num_updates %
                        args.save_checkpoint_every_niter == 0
                        and args.is_master):
                    # Save model checkpoint
                    logger.info("** ** * Saving checkpoint file ** ** * ")
                    trainer.save_checkpoint(checkpoint_file)

            logger.info(f'Epoch {epoch} finished.')

            if args.is_master:
                # Save a trained table_bert
                logger.info("** ** * Saving fine-tuned table_bert ** ** * ")
                model_to_save = model_ptr  # Only save the table_bert it-self
                output_model_file = args.output_dir / f"pytorch_model_epoch{epoch:02d}.bin"
                torch.save(model_to_save.state_dict(), str(output_model_file))

            # perform validation
            logger.info("** ** * Perform validation ** ** * ")
            dev_results = trainer.validate(dev_set)

            if args.is_master:
                logger.info('** ** * Validation Results ** ** * ')
                logger.info(f'Epoch {epoch} Validation Results: {dev_results}')

            # flush logging information to disk
            sys.stderr.flush()

        trainer.next_epoch()

Esempio n. 59

0

Mostra file

File: 5_iterable.py Progetto: scutpaul/skill

print("list_is_iterable:",isinstance(list_,Iterable))
print("dict_is_iterable:",isinstance(dict_,Iterable))
print("str_is_iterable:",isinstance(str_,Iterable))
print("list_generator_is_iteratable",isinstance((x for x in range(10)), Iterable))

#判断是否为迭代器
print("是否为迭代器")
from collections import Iterator
print("list_is_iterator:",isinstance(list_,Iterator))
print("dict_is_iterator:",isinstance(dict_,Iterator))
print("str_is_iterator:",isinstance(str_,Iterator))
print("list_generator_is_iterator",isinstance((x for x in range(10)), Iterator))

#iter
print("使用iter使之成为迭代器")
print("list_iter_is_iterator:",isinstance(iter(list_),Iterator))
print("dict_iter_is_iterator:",isinstance(iter(dict_),Iterator))
print("str_iter_is_iterator:",isinstance(iter(str_),Iterator))

'''
iterator并不存储

这是因为Python的Iterator对象表示的是一个数据流，
Iterator对象可以被next()函数调用并不断返回下一个数据，
直到没有数据时抛出StopIteration错误。
可以把这个数据流看做是一个有序序列，但我们却不能提前知道序列的长度，
只能不断通过next()函数实现按需计算下一个数据，
所以Iterator的计算是惰性的，只有在需要返回下一个数据时它才会计算。
'''
#迭代中修改
list_test = [1,2,3,4,5]

Esempio n. 60

0

Mostra file

 def __iter__(self) -> Iterator[PackFile]:
     return iter(self._files.values())