def testDownload(self): def on_resource(request, **kwargs): global sourcef_file print 'on_resource' print source_file return source_file def on_stream(request, **kwargs): global sourcef_file f = open(sourcef_file, 'rb') return f def on_signature(**kwargs): print 'on_signature' return True def asyncDownloadHandler(response): print 'on_download(asyncDownloadHandler)' print response assert(not response.error) data = response.body l = len(data or '') print '-' * 60 print 'downloaded %d' % l #print data print '-' * 60 global downloaded_size, taget_file, file_size f = open(target_file, 'ab') f.write(data) f.close() downloaded_size += l if downloaded_size >= file_size: ioloop.IOLoop.instance().stop() global flag, downloaded_size flag = False downloaded_size = 0 self.svr1.callbacks['resource'] = on_resource self.svr1.callbacks['signature'] = on_signature cli = httpclient.AsyncHTTPClient() cli.fetch("http://%s:%d/obj1" % (self.ip, self.svr1.port), asyncDownloadHandler) ioloop.IOLoop.instance().start() time.sleep(1) ioloop.IOLoop.instance().stop() s1 = util.md5_file(source_file) print 'md5 of source = %s' % s1 s2 = util.md5_file(target_file) print 'md5 of target = %s' % s2 flag = s1 == s2 print 'testQuery done', flag assert(flag)
def __init__ (self, doc, source, target, transcript): """ Initialize the index, by specifying the source file (generated by LaTeX), the target file (the output of makeindex) and the transcript (e.g. .ilg) file. Transcript is used by glosstex.py. """ self.paranoid = True self.doc = doc self.pbase = doc.src_base self.source = doc.src_base + "." + source self.target = doc.src_base + "." + target self.transcript = doc.src_base + "." + transcript # In paranoid mode, can output only in current working dir if self.paranoid and (os.path.dirname(self.target) == os.getcwd()): self.target = os.path.basename(self.target) self.transcript = os.path.basename(self.transcript) if os.path.exists(self.source): self.md5 = md5_file(self.source) else: self.md5 = None self.tool = "makeindex" self.lang = None # only for xindy self.modules = [] # only for xindy self.opts = [] self.path = [] self.style = None # only for makeindex
def download_site_data(site_info): """ download to a temporary directory the data :param site_info: a sub-dictionary of site information from retrieve_sites_info_waverider_kml function :return: """ temp_dir = tempfile.mkdtemp() # location of the downloaded data # download data file logger.info('downloading data for {site_code} to {temp_dir}'.format(site_code=site_info['site_code'], temp_dir=temp_dir)) try: r = requests.get(site_info['data_zip_url']) except: logger.error('{url} not reachable. Retry'.format(url=site_info['data_zip_url'])) raise requests.ConnectionError zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['data_zip_url'])) with open(zip_file_path, 'wb') as f: f.write(r.content) """ If a site has already been successfully processed, and the data hasn't changed, the zip file will have the same md5 value as the one stored in the pickle file. We then store this in site_info['already_uptodate'] as a boolean to be checked by the __main__ function running this script. In the case where the data file is the same, we don't bother unzipping it """ md5_zip_file = md5_file(zip_file_path) site_info['zip_md5'] = md5_zip_file site_info['already_uptodate'] = False if os.path.exists(PICKLE_FILE): previous_download = load_pickle_db(PICKLE_FILE) if site_info['data_zip_url'] in previous_download.keys(): if previous_download[site_info['data_zip_url']] == md5_zip_file: site_info['already_uptodate'] = True return temp_dir, site_info zip_ref = zipfile.ZipFile(zip_file_path, 'r') zip_ref.extractall(temp_dir) zip_ref.close() os.remove(zip_file_path) # download metadata file logger.info('downloading metadata for {site_code} to {temp_dir}'.format(site_code=site_info['site_code'], temp_dir=temp_dir)) r = requests.get(site_info['metadata_zip_url']) zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['metadata_zip_url'])) with open(zip_file_path, 'wb') as f: f.write(r.content) zip_ref = zipfile.ZipFile(zip_file_path, 'r') zip_ref.extractall(temp_dir) zip_ref.close() os.remove(zip_file_path) return temp_dir, site_info
def run_needed (self): """ Check if makeindex has to be run. This is the case either if the target file does not exist or if the source file has changed. """ if os.path.getsize(self.source) == 0: msg.log(_("the index file %s is empty") % self.source, pkg="index") return 0 new = md5_file(self.source) if not os.path.exists(self.target): self.md5 = new return 1 if not self.md5: self.md5 = new msg.log(_("the index file %s is new") % self.source, pkg="index") return 1 if self.md5 == new: msg.log(_("the index %s did not change") % self.source, pkg="index") return 0 self.md5 = new msg.log(_("the index %s has changed") % self.source, pkg="index") return 1
hasher.update(CONTENT) md5 = hasher.hexdigest() open (os.path.join (DWN1, channels[1], CONTENT_NAME), 'w+').write(CONTENT) file_list = util.popen_py_read ('main.py', ['ls', '--confdir=%s'%(TMP2), '--channels=%s'%(channels[1]), '--name=host1']) print file_list assert md5 in file_list assert CONTENT_NAME in file_list # Subscribe channels print (colors.yellow(" * Synchronization")) util.system_py ('df-links.py', ['subscribe', '--confdir=%s'%(TMP2), '--channel=%s'%(channels[1]), '--name=host1']) util.system_py ('main.py', ['sync', '--confdir=%s'%(TMP2), '--name=host1', '--downloads=%s'%(DWN2)]) f1 = os.path.join (DWN1, channels[1], CONTENT_NAME) f2 = os.path.join (DWN1, channels[1], CONTENT_NAME) f_md5 = util.md5_file(f1) assert os.path.getsize(f1) == os.path.getsize(f2) assert open(f1,'r').read() == open(f2,'r').read() assert f_md5 == util.md5_file(f2) print (colors.yellow(" * %s downloaded correctly: %s" %(CONTENT_NAME, f_md5))) # Clean up p_srv.terminate() p.terminate()
def download_site_data(site_info): """ download to a temporary directory the data :param site_info: a sub-dictionary of site information from retrieve_sites_info_awac_kml function :return: """ temp_dir = tempfile.mkdtemp() logger.info('downloading {url} data for {site_code} to {temp_dir}'.format( url=site_info['text_zip_url'], site_code=site_info['site_code'], temp_dir=temp_dir)) try: r = requests.get(site_info['text_zip_url']) except: logger.error( '{url} not reachable. Retry'.format(url=site_info['text_zip_url'])) raise requests.ConnectionError zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['text_zip_url'])) with open(zip_file_path, 'wb') as f: f.write(r.content) """ If a site has already been successfully processed, and the data hasn't changed, the zip file will have the same md5 value as the one stored in the pickle file. We then store this in site_info['already_uptodate'] as a boolean to be checked by the __main__ function running this script. In the case where the data file is the same, we don't bother unzipping it """ md5_zip_file = md5_file(zip_file_path) site_info['zip_md5'] = md5_zip_file site_info['already_uptodate'] = False if os.path.exists(PICKLE_FILE): previous_download = load_pickle_db(PICKLE_FILE) if site_info['text_zip_url'] in previous_download.keys(): if previous_download[site_info['text_zip_url']] == md5_zip_file: site_info['already_uptodate'] = True return temp_dir, site_info zip_ref = zipfile.ZipFile(zip_file_path, 'r') zip_ref.extractall(temp_dir) zip_ref.close() os.remove(zip_file_path) # the site_path code should be the name of the zip file minus the extension site_path = os.path.join(temp_dir, os.path.basename(zip_file_path).split('.')[0]) if not os.path.exists(site_path): """ Special case: 99% of the download zip files have at their root a folder named after the site code. But at least one zip file doesn't. We're creating this folder and move all the data to this folder so the rest of the codes does not have to deal with special cases. """ os.makedirs(site_path) files = os.listdir(temp_dir) for f in files: if f != os.path.basename(site_path): shutil.move(os.path.join(temp_dir, f), site_path) site_path = os.listdir(temp_dir)[0] if site_info['site_code'] in site_path: return temp_dir, site_info
assert md5 in file_list assert CONTENT_NAME in file_list # Subscribe channels print(colors.yellow(" * Synchronization")) util.system_py('df-links.py', [ 'subscribe', '--confdir=%s' % (TMP2), '--channel=%s' % (channels[1]), '--name=host1' ]) util.system_py('main.py', [ 'sync', '--confdir=%s' % (TMP2), '--name=host1', '--downloads=%s' % (DWN2) ]) f1 = os.path.join(DWN1, channels[1], CONTENT_NAME) f2 = os.path.join(DWN1, channels[1], CONTENT_NAME) f_md5 = util.md5_file(f1) assert os.path.getsize(f1) == os.path.getsize(f2) assert open(f1, 'r').read() == open(f2, 'r').read() assert f_md5 == util.md5_file(f2) print(colors.yellow(" * %s downloaded correctly: %s" % (CONTENT_NAME, f_md5))) # Clean up p_srv.terminate() p.terminate()
def download_site_data(site_info): """ download to a temporary directory the data :param site_info: a sub-dictionary of site information from retrieve_sites_info_awac_kml function :return: """ temp_dir = tempfile.mkdtemp() logger.info('downloading {url} data for {site_code} to {temp_dir}'.format(url=site_info['text_zip_url'], site_code=site_info['site_code'], temp_dir=temp_dir)) try: r = requests.get(site_info['text_zip_url']) except: logger.error('{url} not reachable. Retry'.format(url=site_info['text_zip_url'])) raise requests.ConnectionError zip_file_path = os.path.join(temp_dir, os.path.basename(site_info['text_zip_url'])) with open(zip_file_path, 'wb') as f: f.write(r.content) """ If a site has already been successfully processed, and the data hasn't changed, the zip file will have the same md5 value as the one stored in the pickle file. We then store this in site_info['already_uptodate'] as a boolean to be checked by the __main__ function running this script. In the case where the data file is the same, we don't bother unzipping it """ md5_zip_file = md5_file(zip_file_path) site_info['zip_md5'] = md5_zip_file site_info['already_uptodate'] = False if os.path.exists(PICKLE_FILE): previous_download = load_pickle_db(PICKLE_FILE) if site_info['text_zip_url'] in previous_download.keys(): if previous_download[site_info['text_zip_url']] == md5_zip_file: site_info['already_uptodate'] = True return temp_dir, site_info zip_ref = zipfile.ZipFile(zip_file_path, 'r') zip_ref.extractall(temp_dir) zip_ref.close() os.remove(zip_file_path) # the site_path code should be the name of the zip file minus the extension site_path = os.path.join(temp_dir, os.path.basename(zip_file_path).split('.')[0]) if not os.path.exists(site_path): """ Special case: 99% of the download zip files have at their root a folder named after the site code. But at least one zip file doesn't. We're creating this folder and move all the data to this folder so the rest of the codes does not have to deal with special cases. """ os.makedirs(site_path) files = os.listdir(temp_dir) for f in files: if f != os.path.basename(site_path): shutil.move(os.path.join(temp_dir, f), site_path) site_path = os.listdir(temp_dir)[0] if site_info['site_code'] in site_path: return temp_dir, site_info