Example #1
0
  def test_input_stream(self):

    task.push(FakeTask())
    
    input_stream = datasources.input_stream_for(None, 0, 'repo://dir1/doc1.txt', None)
    eq_('Hi mom!', input_stream[0].next())
    task.pop()
Example #2
0
    def test_input_stream(self):

        task.push(FakeTask())

        input_stream = datasources.input_stream_for(None, 0,
                                                    'repo://dir1/doc1.txt',
                                                    None)
        eq_('Hi mom!', input_stream[0].next())
        task.pop()
Example #3
0
def map_input_stream(stream, size, url, params):
  """
  Looks up an input stream if one is registered, if not
  falls back to disco's defaults.
  
  """
  
  # achtung! warning! when this function is called by the disco
  # node the globals in this module will no longer be visible
  # hence why we access everything through datasources
  from disco.util import schemesplit
  import disco.func
  from triv.io import datasources
  from triv.io.task import task


  # Note: Task is a global set by disco, but not necsarrily seen by other object, 
  # we push it onto the context stack which will allow it to be imported by our
  # modules that need it

  try:
    task.push(Task)
  except NameError:
    # it's a test
    pass
  input_stream = datasources.input_stream_for(stream, size, url, params)

  if not input_stream:
    # we don't handle the given url, see if vanilla disco moduels can...
    
    try:
      # this is normally cleared when we're done iterating
      
      task.pop()
    except IndexError:
       pass
       
    input_stream = disco.func.map_input_stream(stream,size,url,params)

  # same code in classic/worker...
  if isinstance(input_stream, tuple):
    if len(input_stream) == 3:
      input_stream, size, url = input_stream
    else:
      input_stream, url = input_stream

  if hasattr(params, 'content_type'):
    input_stream = datasources.reader_for_mimetype(params.content_type)(input_stream,size,url,params)

  print "using input stream {}".format(input_stream)
  return input_stream
Example #4
0
def error_proof(stream, size, url, params):
  """
  Logs and continues reading even if an error was encountered
  """
  try:
    count = 0
    try:
      for record in stream:
        yield record
        count += 1
    except Exception, e:
      print "Error {} encountered at record {} in {} {}".format(
        e,
        count,
        stream,
        url
      )
  finally:
    try:
      task.pop()
    except IndexError:
      pass