Esempio n. 1
0
File: util.py Progetto: darkua/disco
def read_index(dir):
    from disco.comm import open_url
    file = open_url(proxy_url(dir))
    if dir.endswith(".gz"):
        file = gzip.GzipFile(fileobj=file)
    for line in file:
        yield line.split()
Esempio n. 2
0
File: util.py Progetto: yuj/disco
def read_index(dir):
    from disco.comm import open_url
    file = open_url(proxy_url(dir, to_master=False))
    if dir.endswith(".gz"):
        file = gzip.GzipFile(fileobj=file)
    for line in file:
        yield bytes_to_str(line).split()
Esempio n. 3
0
File: util.py Progetto: dangra/disco
def read_index(dir):
    from disco.comm import open_url
    file = open_url(proxy_url(dir, to_master=False))
    if dir.endswith(".gz"):
        file = gzip.GzipFile(fileobj=file)
    for line in file:
        yield bytes_to_str(line).split()
Esempio n. 4
0
def read_index(dir):
    from disco.comm import open_url
    file = open_url(proxy_url(dir))
    if dir.endswith(".gz"):
        file = gzip.GzipFile(fileobj=file)
    for line in file:
        yield line.split()
Esempio n. 5
0
def open(url, task=None):
    if task:
        scheme, netloc, path = util.urlsplit(url,
                                             localhost=task.host,
                                             disco_port=task.disco_port,
                                             disco_data=task.disco_data,
                                             ddfs_data=task.ddfs_data)
    else:
        scheme, netloc, path = util.urlsplit(url, localhost=None)
    return comm.open_url(util.urljoin((scheme, netloc, path)))
Esempio n. 6
0
def read_index(dir):
    # We might be given replicas of dirs; choose the first.
    if isiterable(dir): dir = dir[0]
    from disco.comm import open_url
    file = open_url(proxy_url(dir, to_master=False))
    if dir.endswith(".gz"):
        file = gzip.GzipFile(fileobj=file)
    for line in file:
        label, url, size = bytes_to_str(line).split()
        yield int(label), url, int(size)
Esempio n. 7
0
def read_index(dir):
    # We might be given replicas of dirs; choose the first.
    if isiterable(dir): dir = dir[0]
    from disco.comm import open_url
    file = open_url(proxy_url(dir, to_master=False))
    if dir.endswith(".gz"):
        file = gzip.GzipFile(fileobj=file)
    for line in file:
        label, url, size = bytes_to_str(line).split()
        yield int(label), url, int(size)
Esempio n. 8
0
def open(url, task=None):
    if task:
        scheme, netloc, path = util.urlsplit(url,
                                             localhost=task.host,
                                             disco_port=task.disco_port,
                                             disco_data=task.disco_data,
                                             ddfs_data=task.ddfs_data)
    else:
        scheme, netloc, path = util.urlsplit(url, localhost=None)
    return comm.open_url(util.urljoin((scheme, netloc, path)))
Esempio n. 9
0
def input_stream(fd, size, url, params):
    """Opens the url locally on the node."""
    assert url.startswith('s3://')
    bucketname, keyname = url[5:].split('/', 1)
    access_key = params.get('aws_access_key_id')
    secret_key = params.get('aws_secret_access_key')
    s3 = connect_s3(access_key, secret_key)
    bucket = s3.get_bucket(bucketname, validate=False)
    key = bucket.get_key(keyname)
    if key.size:
        url = key.generate_url(24*3600)
        return open_url(url), key.size, url
    else:
        return StringIO(), 0, url
Esempio n. 10
0
 def concat_input(cls, task, output_label, replicas):
     output = AtomicFile(task.output_path(output_label))
     BUFFER_SIZE = 1024*1024
     for reps in replicas:
         # Use only the first replica for now, since a set of one
         # is the most common case.
         # TODO: handle falling back to alternative replicas.
         inp = open_url(reps[0])
         buf = inp.read(BUFFER_SIZE)
         while (len(buf) > 0):
             output.write(buf)
             buf = inp.read(BUFFER_SIZE)
         inp.close()
     output.close()
     return output.path, output.size()
Esempio n. 11
0
 def concat_input(cls, task, output_label, replicas):
     output = AtomicFile(task.output_path(output_label))
     BUFFER_SIZE = 1024 * 1024
     for reps in replicas:
         # Use only the first replica for now, since a set of one
         # is the most common case.
         # TODO: handle falling back to alternative replicas.
         inp = open_url(reps[0])
         buf = inp.read(BUFFER_SIZE)
         while (len(buf) > 0):
             output.write(buf)
             buf = inp.read(BUFFER_SIZE)
         inp.close()
     output.close()
     return output.path, output.size()
Esempio n. 12
0
def input_stream(fd, size, url, params):
    """Opens the url locally on the node."""
    from disco.comm import open_url
    return open_url(url)
Esempio n. 13
0
def open(url, task=None):
    _hdfs_scheme, address = schemesplit(url)
    namenode_port, rest = schemesplit(address)
    http_url = 'http://' + namenode_port + '/webhdfs/v1/' + rest + '?op=OPEN'
    return comm.open_url(http_url)
Esempio n. 14
0
def open(url, task=None):
    return comm.open_url(url)
def open(url, task=None):
    _hdfs_scheme, address = schemesplit(url)
    namenode_port, rest = schemesplit(address)
    http_url = 'http://' + namenode_port + '/webhdfs/v1/' + rest + '?op=OPEN'
    return comm.open_url(http_url)
Esempio n. 16
0
def open(url, task=None):
    return comm.open_url(url)