Exemple #1
0
def _sortChunk(records, key, chunkIndex, fields):
  """Sort in memory chunk of records

  records - a list of records read from the original dataset
  key - a list of indices to sort the records by
  chunkIndex - the index of the current chunk

  The records contain only the fields requested by the user.

  _sortChunk() will write the sorted records to a standard File
  named "chunk_<chunk index>.csv" (chunk_0.csv, chunk_1.csv,...).
  """
  title(additional='(key=%s, chunkIndex=%d)' % (str(key), chunkIndex))

  assert len(records) > 0

  # Sort the current records
  records.sort(key=itemgetter(*key))

  # Write to a chunk file
  if chunkIndex is not None:
    filename = 'chunk_%d.csv' % chunkIndex
    with FileRecordStream(filename, write=True, fields=fields) as o:
      for r in records:
        o.appendRecord(r)

    assert os.path.getsize(filename) > 0

  return records
 def _step(self):
   """Run the network for one iteration."""
   title()
   self.runCount = 1
   self.experiment.pause = False
   self._runExperiment()
   self.pause = True
  def onPhaseTeardown(self, exp):
    title()

    index = exp.position.phase
    # Last phase
    if index == len(exp.workflow) - 1:
      self.done = True
 def _step(self):
     """Run the network for one iteration."""
     title()
     self.runCount = 1
     self.experiment.pause = False
     self._runExperiment()
     self.pause = True
    def onPhaseTeardown(self, exp):
        title()

        index = exp.position.phase
        # Last phase
        if index == len(exp.workflow) - 1:
            self.done = True
  def onIter(self, exp, i):
    """ """
    title(additional='(), self.pause = ' + str(self.pause))
    self.iteration += 1

    # check if the pause button was clicked
    if self.pause:
      exp.pause = True
    elif self.runCount is not None:
      self.runCount -= 1
      if self.runCount == 0:
        exp.pause = True

    runtimelistener.listenersEnabled = exp.pause
    def onIter(self, exp, i):
        """ """
        title(additional='(), self.pause = ' + str(self.pause))
        self.iteration += 1

        # check if the pause button was clicked
        if self.pause:
            exp.pause = True
        elif self.runCount is not None:
            self.runCount -= 1
            if self.runCount == 0:
                exp.pause = True

        runtimelistener.listenersEnabled = exp.pause
  def onPhaseSetup(self, exp):
    title()
    self.iteration = 0
    self.phase = self._getPhase(exp)
    phase = self.phase[1]
    self.iterationCount = phase[0]['iterationCount'] if len(phase) > 0 else 0

    if self.pauseAtNextStep and self.pauseAtPhaseSetup:
    #if self.pauseAtNextStep:
      #from dbgp.client import brk; brk(port=9011)
      exp.pause = True
      self.pause = True
      self.pauseAtPhaseSetup = False
    else:
      self.pauseAtPhaseSetup = True
    def onPhaseSetup(self, exp):
        title()
        self.iteration = 0
        self.phase = self._getPhase(exp)
        phase = self.phase[1]
        self.iterationCount = phase[0]['iterationCount'] if len(
            phase) > 0 else 0

        if self.pauseAtNextStep and self.pauseAtPhaseSetup:
            #if self.pauseAtNextStep:
            #from dbgp.client import brk; brk(port=9011)
            exp.pause = True
            self.pause = True
            self.pauseAtPhaseSetup = False
        else:
            self.pauseAtPhaseSetup = True
Exemple #10
0
def _mergeFiles(key, chunkCount, outputFile, fields):
    """Merge sorted chunk files into a sorted output file

  chunkCount - the number of available chunk files
  outputFile the name of the sorted output file

  _mergeFiles()

  """
    title()

    # Open all chun files
    files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)]

    # Open output file
    with FileRecordStream(outputFile, write=True, fields=fields) as o:
        # Open all chunk files
        files = [
            FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)
        ]
        records = [f.getNextRecord() for f in files]

        # This loop will run until all files are exhausted
        while not all(r is None for r in records):
            # Cleanup None values (files that were exhausted)
            indices = [i for i, r in enumerate(records) if r is not None]
            records = [records[i] for i in indices]
            files = [files[i] for i in indices]

            # Find the current record
            r = min(records, key=itemgetter(*key))
            # Write it to the file
            o.appendRecord(r)

            # Find the index of file that produced the current record
            index = records.index(r)
            # Read a new record from the file
            records[index] = files[index].getNextRecord()

    # Cleanup chunk files
    for i, f in enumerate(files):
        f.close()
        os.remove('chunk_%d.csv' % i)
Exemple #11
0
def _mergeFiles(key, chunkCount, outputFile, fields):
  """Merge sorted chunk files into a sorted output file

  chunkCount - the number of available chunk files
  outputFile the name of the sorted output file

  _mergeFiles()

  """
  title()

  # Open all chun files
  files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)]

  # Open output file
  with FileRecordStream(outputFile, write=True, fields=fields) as o:
    # Open all chunk files
    files = [FileRecordStream('chunk_%d.csv' % i) for i in range(chunkCount)]
    records = [f.getNextRecord() for f in files]

    # This loop will run until all files are exhausted
    while not all(r is None for r in records):
      # Cleanup None values (files that were exhausted)
      indices = [i for i,r in enumerate(records) if r is not None]
      records = [records[i] for i in indices]
      files = [files[i] for i in indices]

      # Find the current record
      r = min(records, key=itemgetter(*key))
      # Write it to the file
      o.appendRecord(r)

      # Find the index of file that produced the current record
      index = records.index(r)
      # Read a new record from the file
      records[index] = files[index].getNextRecord()

  # Cleanup chunk files
  for i, f in enumerate(files):
    f.close()
    os.remove('chunk_%d.csv' % i)