Example #1
0
def _get_downloaded_file(filename):
    """ given a test filename, return the file_meta dictionary needed by
      the workflow. Used for tests.
    """
    if filename:
        from util import get_test_file
        csv_file = get_test_file(filename, 'scheduler')

        magic_type = util.MS.file(csv_file)
        return {
            'out_file': csv_file,
            'file_name': filename,
            'file_size': os.path.getsize(csv_file),
            'content_type': 'text/csv',
            'md5sum': md5_for_file(csv_file),
            'magic_type': util.magic_to_mime(magic_type),
        }

    print
    print '*' * 80
    print "WARNING"
    print '*' * 80
    print "When testing the workflow, you should set what to do with " \
          "each source you're dealing with. Check " \
          "webui.scheduler.test_helpers.py"
    print '*' * 80
    print
    return {}
Example #2
0
def _get_downloaded_file(filename):
    """ given a test filename, return the file_meta dictionary needed by
      the workflow. Used for tests.
    """
    if filename:
        from util import get_test_file
        csv_file = get_test_file(
            filename, 'scheduler'
        )

        magic_type = util.MS.file(csv_file)
        return {
            'out_file': csv_file,
            'file_name': filename,
            'file_size': os.path.getsize(csv_file),
            'content_type': 'text/csv',
            'md5sum': md5_for_file(csv_file),
            'magic_type': util.magic_to_mime(magic_type),
        }

    print
    print '*' * 80
    print "WARNING"
    print '*' * 80
    print "When testing the workflow, you should set what to do with " \
          "each source you're dealing with. Check " \
          "webui.scheduler.test_helpers.py"
    print '*' * 80
    print
    return {}
Example #3
0
def _download_url(url, save_dir):
    """Downloads a URL into a file and save HTTP metadata."""
    loggy = local.logger

    request = urllib2.Request(url)
    opener = urllib2.build_opener(CustomRedirectHandler())
    url_handler = opener.open(request)

    file_meta = __file_meta_from_headers(
        url,
        url_handler.headers,
        url_handler.redirect_headers
        if hasattr(url_handler, 'redirect_headers') else None
    )
    file_name = file_meta['file_name']

    # do not remove directory here, download may be in parallel with other
    # tasks using the same URL
    # shutil.rmtree(save_dir, True)
    __mkdir_p(save_dir)
    if file_name[-1] == '/':
        file_name = file_name[0:-1]

    out_file = save_dir + '/' + file_name

    output = open(out_file, 'wb')
    file_size = file_meta['file_size']
    if file_size == 0:
        file_size = 1
    loggy.info("Downloading: %s KBytes: %s", file_name, file_size / 1024)
    file_size_dl = 0
    block_sz = 8192
    emit_status_count = 0
    while True:
        data_buffer = url_handler.read(block_sz)
        if not data_buffer:
            break
        file_size_dl += len(data_buffer)
        output.write(data_buffer)
        if emit_status_count >= 100:
            loggy.info("%10d [%3.2f%%]", file_size_dl,
                       file_size_dl * 100. / file_size)
            emit_status_count = 0
        else:
            emit_status_count += 1
    output.close()

    # http://www.gavinj.net/2007/05/python-file-magic.html
    magic_type = util.MS.file(out_file)
    file_meta.update({
        'out_file': out_file,
        'magic_type': util.magic_to_mime(magic_type),
        'md5sum': md5_for_file(out_file)
    })
    if file_meta['file_size'] == -1:
        file_meta['file_size'] = os.path.getsize(file_meta['out_file'])
    return file_meta
Example #4
0
def _download_url(url, save_dir):
    """Downloads a URL into a file and save HTTP metadata."""
    loggy = local.logger

    request = urllib2.Request(url)
    opener = urllib2.build_opener(CustomRedirectHandler())
    url_handler = opener.open(request)

    file_meta = __file_meta_from_headers(
        url, url_handler.headers, url_handler.redirect_headers if hasattr(
            url_handler, 'redirect_headers') else None)
    file_name = file_meta['file_name']

    # do not remove directory here, download may be in parallel with other
    # tasks using the same URL
    # shutil.rmtree(save_dir, True)
    __mkdir_p(save_dir)
    if file_name[-1] == '/':
        file_name = file_name[0:-1]

    out_file = save_dir + '/' + file_name

    output = open(out_file, 'wb')
    file_size = file_meta['file_size']
    if file_size == 0:
        file_size = 1
    loggy.info("Downloading: %s KBytes: %s", file_name, file_size / 1024)
    file_size_dl = 0
    block_sz = 8192
    emit_status_count = 0
    while True:
        data_buffer = url_handler.read(block_sz)
        if not data_buffer:
            break
        file_size_dl += len(data_buffer)
        output.write(data_buffer)
        if emit_status_count >= 100:
            loggy.info("%10d [%3.2f%%]", file_size_dl,
                       file_size_dl * 100. / file_size)
            emit_status_count = 0
        else:
            emit_status_count += 1
    output.close()

    # http://www.gavinj.net/2007/05/python-file-magic.html
    magic_type = util.MS.file(out_file)
    file_meta.update({
        'out_file': out_file,
        'magic_type': util.magic_to_mime(magic_type),
        'md5sum': md5_for_file(out_file)
    })
    if file_meta['file_size'] == -1:
        file_meta['file_size'] = os.path.getsize(file_meta['out_file'])
    return file_meta
Example #5
0
 def test_magic_to_mime(self):
     self.assertEqual("text/plain", util.magic_to_mime('ascii text, with crlf line terminators'))