Beispiel #1
0
    def testDownload(self):

        def on_resource(request, **kwargs):
            global sourcef_file
            print 'on_resource'
            print source_file
            return source_file

        def on_stream(request, **kwargs):
            global sourcef_file
            f = open(sourcef_file, 'rb')
            return f

        def on_signature(**kwargs):
            print 'on_signature'
            return True

        def asyncDownloadHandler(response):
            print 'on_download(asyncDownloadHandler)'
            print response
            assert(not response.error)

            data = response.body
            l = len(data or '')
            print '-' * 60
            print 'downloaded %d' % l
            #print data
            print '-' * 60

            global downloaded_size, taget_file, file_size
            f = open(target_file, 'ab')
            f.write(data)
            f.close()
            downloaded_size += l
            if downloaded_size >= file_size: 
                ioloop.IOLoop.instance().stop()

        global flag, downloaded_size
        flag = False
        downloaded_size = 0
        self.svr1.callbacks['resource'] = on_resource
        self.svr1.callbacks['signature'] = on_signature

        cli = httpclient.AsyncHTTPClient()
        cli.fetch("http://%s:%d/obj1" % (self.ip, self.svr1.port), asyncDownloadHandler)
        ioloop.IOLoop.instance().start()

        time.sleep(1)

        ioloop.IOLoop.instance().stop()
        s1 = util.md5_file(source_file)
        print 'md5 of source = %s' % s1

        s2 = util.md5_file(target_file)
        print 'md5 of target = %s' % s2
        
        flag = s1 == s2
        print 'testQuery done', flag
        assert(flag)
Beispiel #2
0
    def __init__ (self, doc, source, target, transcript):
        """
        Initialize the index, by specifying the source file (generated by
        LaTeX), the target file (the output of makeindex) and the transcript
        (e.g. .ilg) file.  Transcript is used by glosstex.py.
        """
        self.paranoid = True
        self.doc = doc
        self.pbase = doc.src_base
        self.source = doc.src_base + "." + source
        self.target = doc.src_base + "." + target
        self.transcript = doc.src_base + "." + transcript

        # In paranoid mode, can output only in current working dir
        if self.paranoid and (os.path.dirname(self.target) == os.getcwd()):
            self.target = os.path.basename(self.target)
            self.transcript = os.path.basename(self.transcript)

        if os.path.exists(self.source):
            self.md5 = md5_file(self.source)
        else:
            self.md5 = None

        self.tool = "makeindex"
        self.lang = None   # only for xindy
        self.modules = []  # only for xindy
        self.opts = []
        self.path = []
        self.style = None  # only for makeindex
def download_site_data(site_info):
    """
    download to a temporary directory the data
    :param site_info: a sub-dictionary of site information from retrieve_sites_info_waverider_kml function
    :return:
    """
    temp_dir = tempfile.mkdtemp()  # location of the downloaded data

    # download data file
    logger.info('downloading data for {site_code} to {temp_dir}'.format(site_code=site_info['site_code'],
                                                                        temp_dir=temp_dir))
    try:
        r = requests.get(site_info['data_zip_url'])
    except:
        logger.error('{url} not reachable. Retry'.format(url=site_info['data_zip_url']))
        raise requests.ConnectionError

    zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['data_zip_url']))

    with open(zip_file_path, 'wb') as f:
        f.write(r.content)

    """
    If a site has already been successfully processed, and the data hasn't changed, the zip file will have the same md5 
    value as the one stored in the pickle file. We then store this in site_info['already_uptodate'] as a boolean to be 
    checked by the __main__ function running this script. In the case where the data file is the same, we don't bother
    unzipping it
    """
    md5_zip_file = md5_file(zip_file_path)
    site_info['zip_md5'] = md5_zip_file
    site_info['already_uptodate'] = False
    if os.path.exists(PICKLE_FILE):
        previous_download = load_pickle_db(PICKLE_FILE)
        if site_info['data_zip_url'] in previous_download.keys():
            if previous_download[site_info['data_zip_url']] == md5_zip_file:
                site_info['already_uptodate'] = True
                return temp_dir, site_info

    zip_ref = zipfile.ZipFile(zip_file_path, 'r')
    zip_ref.extractall(temp_dir)
    zip_ref.close()
    os.remove(zip_file_path)

    # download metadata file
    logger.info('downloading metadata for {site_code} to {temp_dir}'.format(site_code=site_info['site_code'],
                                                                            temp_dir=temp_dir))

    r = requests.get(site_info['metadata_zip_url'])
    zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['metadata_zip_url']))

    with open(zip_file_path, 'wb') as f:
        f.write(r.content)

    zip_ref = zipfile.ZipFile(zip_file_path, 'r')
    zip_ref.extractall(temp_dir)
    zip_ref.close()
    os.remove(zip_file_path)

    return temp_dir, site_info
Beispiel #4
0
 def run_needed (self):
     """
     Check if makeindex has to be run. This is the case either if the
     target file does not exist or if the source file has changed.
     """
     if os.path.getsize(self.source) == 0:
         msg.log(_("the index file %s is empty") % self.source, pkg="index")
         return 0
     new = md5_file(self.source)
     if not os.path.exists(self.target):
         self.md5 = new
         return 1
     if not self.md5:
         self.md5 = new
         msg.log(_("the index file %s is new") % self.source, pkg="index")
         return 1
     if self.md5 == new:
         msg.log(_("the index %s did not change") % self.source, pkg="index")
         return 0
     self.md5 = new
     msg.log(_("the index %s has changed") % self.source, pkg="index")
     return 1
Beispiel #5
0
hasher.update(CONTENT)
md5 = hasher.hexdigest()

open (os.path.join (DWN1, channels[1], CONTENT_NAME), 'w+').write(CONTENT)

file_list = util.popen_py_read ('main.py', ['ls', '--confdir=%s'%(TMP2), '--channels=%s'%(channels[1]), '--name=host1'])
print file_list
assert md5 in file_list
assert CONTENT_NAME in file_list

# Subscribe channels
print (colors.yellow(" * Synchronization"))

util.system_py ('df-links.py', ['subscribe', '--confdir=%s'%(TMP2), '--channel=%s'%(channels[1]), '--name=host1'])
util.system_py ('main.py', ['sync', '--confdir=%s'%(TMP2), '--name=host1', '--downloads=%s'%(DWN2)])

f1 = os.path.join (DWN1, channels[1], CONTENT_NAME)
f2 = os.path.join (DWN1, channels[1], CONTENT_NAME)
f_md5 = util.md5_file(f1)

assert os.path.getsize(f1) == os.path.getsize(f2)
assert open(f1,'r').read() == open(f2,'r').read()
assert f_md5 == util.md5_file(f2)

print (colors.yellow(" * %s downloaded correctly: %s" %(CONTENT_NAME, f_md5)))


# Clean up
p_srv.terminate()
p.terminate()
Beispiel #6
0
def download_site_data(site_info):
    """
    download to a temporary directory the data
    :param site_info: a sub-dictionary of site information from retrieve_sites_info_awac_kml function
    :return:
    """
    temp_dir = tempfile.mkdtemp()
    logger.info('downloading {url} data for {site_code} to {temp_dir}'.format(
        url=site_info['text_zip_url'],
        site_code=site_info['site_code'],
        temp_dir=temp_dir))

    try:
        r = requests.get(site_info['text_zip_url'])
    except:
        logger.error(
            '{url} not reachable. Retry'.format(url=site_info['text_zip_url']))
        raise requests.ConnectionError

    zip_file_path = os.path.join(temp_dir,
                                 os.path.basename(site_info['text_zip_url']))

    with open(zip_file_path, 'wb') as f:
        f.write(r.content)
    """
    If a site has already been successfully processed, and the data hasn't changed, the zip file will have the same md5 
    value as the one stored in the pickle file. We then store this in site_info['already_uptodate'] as a boolean to be 
    checked by the __main__ function running this script. In the case where the data file is the same, we don't bother
    unzipping it
    """
    md5_zip_file = md5_file(zip_file_path)
    site_info['zip_md5'] = md5_zip_file
    site_info['already_uptodate'] = False
    if os.path.exists(PICKLE_FILE):
        previous_download = load_pickle_db(PICKLE_FILE)
        if site_info['text_zip_url'] in previous_download.keys():
            if previous_download[site_info['text_zip_url']] == md5_zip_file:
                site_info['already_uptodate'] = True
                return temp_dir, site_info

    zip_ref = zipfile.ZipFile(zip_file_path, 'r')
    zip_ref.extractall(temp_dir)
    zip_ref.close()
    os.remove(zip_file_path)

    # the site_path code should be the name of the zip file minus the extension
    site_path = os.path.join(temp_dir,
                             os.path.basename(zip_file_path).split('.')[0])

    if not os.path.exists(site_path):
        """
        Special case:
        99% of the download zip files have at their root a folder named after the site code. But at least one zip file
        doesn't. We're creating this folder and move all the data to this folder so the rest of the codes does not have 
        to deal with special cases.
        """
        os.makedirs(site_path)
        files = os.listdir(temp_dir)

        for f in files:
            if f != os.path.basename(site_path):
                shutil.move(os.path.join(temp_dir, f), site_path)

    site_path = os.listdir(temp_dir)[0]
    if site_info['site_code'] in site_path:
        return temp_dir, site_info
Beispiel #7
0
assert md5 in file_list
assert CONTENT_NAME in file_list

# Subscribe channels
print(colors.yellow(" * Synchronization"))

util.system_py('df-links.py', [
    'subscribe',
    '--confdir=%s' % (TMP2),
    '--channel=%s' % (channels[1]), '--name=host1'
])
util.system_py('main.py', [
    'sync',
    '--confdir=%s' % (TMP2), '--name=host1',
    '--downloads=%s' % (DWN2)
])

f1 = os.path.join(DWN1, channels[1], CONTENT_NAME)
f2 = os.path.join(DWN1, channels[1], CONTENT_NAME)
f_md5 = util.md5_file(f1)

assert os.path.getsize(f1) == os.path.getsize(f2)
assert open(f1, 'r').read() == open(f2, 'r').read()
assert f_md5 == util.md5_file(f2)

print(colors.yellow(" * %s downloaded correctly: %s" % (CONTENT_NAME, f_md5)))

# Clean up
p_srv.terminate()
p.terminate()
Beispiel #8
0
def download_site_data(site_info):
    """
    download to a temporary directory the data
    :param site_info: a sub-dictionary of site information from retrieve_sites_info_awac_kml function
    :return:
    """
    temp_dir = tempfile.mkdtemp()
    logger.info('downloading {url} data for {site_code} to {temp_dir}'.format(url=site_info['text_zip_url'],
                                                                              site_code=site_info['site_code'],
                                                                              temp_dir=temp_dir))

    try:
        r = requests.get(site_info['text_zip_url'])
    except:
        logger.error('{url} not reachable. Retry'.format(url=site_info['text_zip_url']))
        raise requests.ConnectionError

    zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['text_zip_url']))

    with open(zip_file_path, 'wb') as f:
        f.write(r.content)

    """
    If a site has already been successfully processed, and the data hasn't changed, the zip file will have the same md5 
    value as the one stored in the pickle file. We then store this in site_info['already_uptodate'] as a boolean to be 
    checked by the __main__ function running this script. In the case where the data file is the same, we don't bother
    unzipping it
    """
    md5_zip_file = md5_file(zip_file_path)
    site_info['zip_md5'] = md5_zip_file
    site_info['already_uptodate'] = False
    if os.path.exists(PICKLE_FILE):
        previous_download = load_pickle_db(PICKLE_FILE)
        if site_info['text_zip_url'] in previous_download.keys():
            if previous_download[site_info['text_zip_url']] == md5_zip_file:
                site_info['already_uptodate'] = True
                return temp_dir, site_info

    zip_ref = zipfile.ZipFile(zip_file_path, 'r')
    zip_ref.extractall(temp_dir)
    zip_ref.close()
    os.remove(zip_file_path)

    # the site_path code should be the name of the zip file minus the extension
    site_path = os.path.join(temp_dir, os.path.basename(zip_file_path).split('.')[0])

    if not os.path.exists(site_path):
        """
        Special case:
        99% of the download zip files have at their root a folder named after the site code. But at least one zip file
        doesn't. We're creating this folder and move all the data to this folder so the rest of the codes does not have 
        to deal with special cases.
        """
        os.makedirs(site_path)
        files = os.listdir(temp_dir)

        for f in files:
            if f != os.path.basename(site_path):
                shutil.move(os.path.join(temp_dir, f), site_path)

    site_path = os.listdir(temp_dir)[0]
    if site_info['site_code'] in site_path:
        return temp_dir, site_info