def list(self, select=None, nlast=None): con = self.connect() log("listing cache") selection_string = "" if select is not None: selection_string = " WHERE " + select # must be string nlast_string = "" if nlast is not None: nlast_string = " ORDER BY rowid DESC LIMIT %i" % nlast # must be int with con: cur = con.cursor() log("SELECT * FROM cacheindex" + selection_string + nlast_string) self.retry_execute(cur, "SELECT * FROM cacheindex" + selection_string + nlast_string) rows = cur.fetchall() log("found rows", len(rows)) for h, fh, c in rows: try: c = cPickle.loads(str(c)) log(str(h), str(c)) except Exception as e: log("exception while loading:", e) raise return len(rows)
def make_delegation_record(self, hashe, module_description, dependencies): import json log("will store", hashe, module_description) # con = lite.connect(self.filecacheroot+'/index.db') db = self.connect() shorthashe = self.hashe2signature(hashe) if dependencies is not None and dependencies != []: # two??.. status = "waiting for:" + ",".join(dependencies) # comas? else: status = "ready to run" with db: cur = db.cursor() self.retry_execute( cur, "CREATE TABLE IF NOT EXISTS delegationindex(id MEDIUMINT NOT NULL AUTO_INCREMENT, timestamp DOUBLE, hashe TEXT, fullhashe TEXT, modules TEXT, status TEXT, PRIMARY KEY (id))" ) self.retry_execute( cur, "INSERT INTO delegationindex (timestamp,hashe,fullhashe,modules,status) VALUES(%s,%s,%s,%s,%s)", (time.time(), shorthashe, json.dumps(hashe), json.dumps(module_description), status)) log("now rows", cur.rowcount) return shorthashe
def make_delegation_record(self, hashe, module_description, dependencies): import json log("will store", hashe, module_description) # con = lite.connect(self.filecacheroot+'/index.db') db = self.connect() shorthashe = self.hashe2signature(hashe) if dependencies is not None and dependencies != []: # two??.. status = "waiting for:" + ",".join(dependencies) # comas? else: status = "ready to run" with db: cur = db.cursor() self.retry_execute(cur, "CREATE TABLE IF NOT EXISTS delegationindex(id MEDIUMINT NOT NULL AUTO_INCREMENT, timestamp DOUBLE, hashe TEXT, fullhashe TEXT, modules TEXT, status TEXT, PRIMARY KEY (id))") self.retry_execute(cur, "INSERT INTO delegationindex (timestamp,hashe,fullhashe,modules,status) VALUES(%s,%s,%s,%s,%s)", (time.time(), shorthashe, json.dumps(hashe), json.dumps(module_description), status)) log("now rows", cur.rowcount) return shorthashe
def restore(self,hashe,obj,restore_config=None): #return # problem with files if obj.run_for_hashe or obj.mutating: return # check if updated self.list() c=self.find(hashe) if c is None: if not obj.cached: log("object is not cached, i.e. only transient level cache; not leading to parent") return return self.restore_from_parent(hashe,obj,restore_config) if hasattr(c,'_da_recovered_restore_config') and c._da_recovered_restore_config!=restore_config: log("object in Transient cache was recovered with a different restore config: need to restore from parent") return self.restore_from_parent(hashe,obj,restore_config) log("transient cache stores results in the memory, found:",c) obj.import_data(c) log("also files restores are ignored") log("restored") return True
def adopt_datafiles(self, content): from dataanalysis.core import DataFile, map_nested_structure # very delayed import extra_content = {} remove_keys = [] def mapping_adoption(k, b): a = re.sub("[^a-zA-Z0-9\-]", "_", "_".join(map(str, k))) adopted_b = DataFile.from_object(k, b, optional=True) if adopted_b is not b: log("storing adopted DataFile", a, adopted_b, level="main") extra_content["_datafile_" + a] = adopted_b return None # datafile is put elsewhere return adopted_b content = map_nested_structure(content, mapping_adoption) if len(extra_content) > 0: log("extra content:", extra_content) content = dict(content.items() + extra_content.items()) log("after adoption, keys", content.keys()) return content
def report_analysis_state(self,obj,state): state_dir_root=self.filecacheroot+"/state_reports/" state_dir=state_dir_root+"/"+obj.get_signature() try: os.makedirs(state_dir) except os.error: log("unable to create state dir!") # exist? state_ticket_fn=repr(obj)+"_"+time.strftime("%Y%m%d_%H%M%S")+".txt" state_ticket_fn=state_ticket_fn.replace("]","") state_ticket_fn=state_ticket_fn.replace("[","") state_ticket_fn=state_ticket_fn.replace(":","_") f=open(state_dir+"/"+state_ticket_fn,"w") f.write("-"*80+"\n") f.write(repr(state)+"\n\n") f.write(socket.gethostname()+"\n\n") f.write(time.strftime("%Y-%m-%dT%H:%M:%S %a %d %B")+"\n\n") if hasattr(obj,'_da_requested_by'): f.write("requested by: "+" ".join(obj._da_requested_by)+"\n\n") if hasattr(obj,'_da_expected_full_hashe'): f.write("expected as: "+repr(obj._da_expected_full_hashe)+"\n\n") try: f.write("factory knows: "+repr(analysisfactory.AnalysisFactory.cache)+"\n\n") except Exception as e: log(e)
def restore(self, hashe, obj, restore_config=None): #return # problem with files if obj.run_for_hashe or obj.mutating: return # check if updated self.list() c = self.find(hashe) if c is None: if not obj.cached: log("object is not cached, i.e. only transient level cache; not leading to parent" ) return return self.restore_from_parent(hashe, obj, restore_config) if hasattr(c, '_da_recovered_restore_config' ) and c._da_recovered_restore_config != restore_config: log("object in Transient cache was recovered with a different restore config: need to restore from parent" ) return self.restore_from_parent(hashe, obj, restore_config) log("transient cache stores results in the memory, found:", c) obj.import_data(c) log("also files restores are ignored") log("restored") return True
def adopt_datafiles(self,content): from dataanalysis.core import DataFile, map_nested_structure # very delayed import extra_content={} remove_keys=[] def mapping_adoption(k, b): a = re.sub("[^a-zA-Z0-9\-]", "_", "_".join(map(str,k))) adopted_b=DataFile.from_object(k,b,optional=True) if adopted_b is not b: log("storing adopted DataFile",a,adopted_b,level="main") extra_content["_datafile_"+a]=adopted_b return None # datafile is put elsewhere return adopted_b content=map_nested_structure(content,mapping_adoption) if len(extra_content)>0: log("extra content:",extra_content) content=dict(content.items() + extra_content.items()) log("after adoption, keys",content.keys()) return content
def store_to_parent(self,hashe,obj): if self.parent is None: log("no parent available to call for") return None log(self,"there is a parent available to call for:",self.parent) return self.parent.store(hashe,obj)
def list(self, select=None, nlast=None): con = self.connect() log("listing cache") selection_string = "" if select is not None: selection_string = " WHERE " + select # must be string nlast_string = "" if nlast is not None: nlast_string = " ORDER BY rowid DESC LIMIT %i" % nlast # must be int with con: cur = con.cursor() log("SELECT * FROM cacheindex" + selection_string + nlast_string) self.retry_execute( cur, "SELECT * FROM cacheindex" + selection_string + nlast_string) rows = cur.fetchall() log("found rows", len(rows)) for h, fh, c in rows: try: c = cPickle.loads(str(c)) log(str(h), str(c)) except Exception as e: log("exception while loading:", e) raise return len(rows)
def report_analysis_state(self, obj, state): state_dir_root = self.filecacheroot + "/state_reports/" state_dir = state_dir_root + "/" + obj.get_signature() try: os.makedirs(state_dir) except os.error: log("unable to create state dir!") # exist? state_ticket_fn = repr(obj) + "_" + time.strftime( "%Y%m%d_%H%M%S") + ".txt" state_ticket_fn = state_ticket_fn.replace("]", "") state_ticket_fn = state_ticket_fn.replace("[", "") state_ticket_fn = state_ticket_fn.replace(":", "_") f = open(state_dir + "/" + state_ticket_fn, "w") f.write("-" * 80 + "\n") f.write(repr(state) + "\n\n") f.write(socket.gethostname() + "\n\n") f.write(time.strftime("%Y-%m-%dT%H:%M:%S %a %d %B") + "\n\n") if hasattr(obj, '_da_requested_by'): f.write("requested by: " + " ".join(obj._da_requested_by) + "\n\n") if hasattr(obj, '_da_expected_full_hashe'): f.write("expected as: " + repr(obj._da_expected_full_hashe) + "\n\n") try: f.write("factory knows: " + repr(analysisfactory.AnalysisFactory.cache) + "\n\n") except Exception as e: log(e)
def store_to_parent(self, hashe, obj): if self.parent is None: log("no parent available to call for") return None log("there is a parent available to call for:", self.parent) return self.parent.store(hashe, obj)
def load(self,target=None): if target is None: target=self.filecacheroot+"/index.pickle.gz" if os.path.exists(target): self.cache=cPickle.load(gzip.open(target)) else: log("file to load does not exist:",target)
def load(self, target=None): if target is None: target = self.filecacheroot + "/index.pickle.gz" if os.path.exists(target): self.cache = cPickle.load(gzip.open(target)) else: log("file to load does not exist:", target)
def hashe2signature(self,hashe_raw): hashe= hashtools.hashe_replace_object(hashe_raw, None, "None") log("hashe:",hashe) if isinstance(hashe,tuple): if hashe[0]=="analysis": return hashe[2]+":" + hashtools.shhash(hashe)[:8] sig= hashtools.shhash(hashe)[:8] log("signature hashe:",sig) return sig
def inject_serialization(self, serialization): log("injecting", serialization) name, data = serialization obj = self[name] obj.import_data(data) obj.infactory = True obj.virtual = True self.put(obj) log("result of injection", self.byname(obj.get_signature()))
def hashe2signature(self, hashe_raw): hashe = hashtools.hashe_replace_object(hashe_raw, None, "None") log("hashe:", hashe) if isinstance(hashe, tuple): if hashe[0] == "analysis": return hashe[2] + ":" + hashtools.shhash(hashe)[:8] sig = hashtools.shhash(hashe)[:8] log("signature hashe:", sig) return sig
def retry_execute(self, cur, *a, **aa): timeout = aa['timeout'] if 'timeout' in aa else 10 e = Exception("undefined exception during retry") for x in range(timeout): try: return cur.execute(*a) except Exception as e: log(render("{RED}sqlite execute failed, try again{/}: " + repr(e)), x) time.sleep(1) raise e
def set_callback_accepted_classes(cls,classes): if cls.callback_accepted_classes is None: cls.callback_accepted_classes=[] for c in classes: if c not in cls.callback_accepted_classes: log("adding accepted class",c,level="callback") cls.callback_accepted_classes.append(c) log("callback currently accepts classes",cls.callback_accepted_classes)
def mapping_adoption(k, b): a = re.sub("[^a-zA-Z0-9\-]", "_", "_".join(map(str,k))) adopted_b=DataFile.from_object(k,b,optional=True) if adopted_b is not b: log("storing adopted DataFile",a,adopted_b,level="main") extra_content["_datafile_"+a]=adopted_b return None # datafile is put elsewhere return adopted_b
def datafile_restore_mapper(k, b): log("processing structure entry",k,b) if is_datafile(b): if len(k)==1: a=k[0] else: a=k self.restore_datafile(a, b, cached_path, restore_config, obj, hashe, add_keys, remove_keys) return b
def mapping_adoption(k, b): a = re.sub("[^a-zA-Z0-9\-]", "_", "_".join(map(str, k))) adopted_b = DataFile.from_object(k, b, optional=True) if adopted_b is not b: log("storing adopted DataFile", a, adopted_b, level="main") extra_content["_datafile_" + a] = adopted_b return None # datafile is put elsewhere return adopted_b
def find(self, hashe): self.load() if hashe in self.cache: fi = self.cache[hashe] log("{log:cache}", "cache found!", fi) return fi log("found no cache for", hashe) return None
def remove_repeating_stacks(input_stack): exclude_mask=[False]*len(input_stack) for stack_length in range(1,len(input_stack)/2): for stack_start in range(0,len(input_stack)-stack_length): if input_stack[stack_start:stack_start+stack_length] == input_stack[stack_start+stack_length:stack_start+stack_length+stack_length]: log("found repetition of ",stack_start,stack_length,":",input_stack[stack_start:stack_start+stack_length*2],level="top") for i in range(stack_start+stack_length,stack_start+stack_length+stack_length): exclude_mask[i]=True if sum(exclude_mask)>0: log("excluding",sum(exclude_mask),"out of",len(exclude_mask),level="top") return [inp for inp,m in zip(input_stack,exclude_mask) if not m]
def find(self,hashe): self.load() if hashe in self.cache: fi=self.cache[hashe] log("{log:cache}","cache found!",fi) return fi log("found no cache for",hashe,"in",self) return None
def datafile_restore_mapper(k, b): log("processing structure entry", k, b) if is_datafile(b): if len(k) == 1: a = k[0] else: a = k self.restore_datafile(a, b, cached_path, restore_config, obj, hashe, add_keys, remove_keys) return b
def shhash(x): try: x=hashe_replace_object(x,None,'None') except Exception as e: log("error while hashe_replace_object",e,level="hashe") try: x=hashe_map(x,str) except Exception as e: log("error while hash_map",e,level="hashe") return sha224(str(hash(x)).encode('utf-8')).hexdigest()
def retry_execute(self, cur, *a, **aa): timeout = aa['timeout'] if 'timeout' in aa else 10 e = Exception("undefined exception during retry") for x in range(timeout): try: return cur.execute(*a) except Exception as e: log( render("{RED}sqlite execute failed, try again{/}: " + repr(e)), x) time.sleep(1) raise e
def retry_execute(self, cur, *a, **aa): timeout = aa['timeout'] if 'timeout' in aa else 10 e=Exception("while retry_execute") for x in range(timeout): try: log(a) self.total_attempts += 1 return cur.execute(*a) except Exception as e: self.failed_attempts += 1 log(render("{RED}mysql execute failed, try again{/}: " + repr(e)), x) time.sleep(1) raise e
def report_exception(self, obj, e): exception_dir_root = self.filecacheroot + "/exception_reports/" exception_dir = exception_dir_root + "/" + obj.get_signature() try: os.makedirs(exception_dir) except os.error: log("unable to create exception dir!") # exissst? exception_ticket_fn = repr(obj) + "_" + time.strftime( "%Y%m%d_%H%M%S") + ".txt" exception_ticket_fn = exception_ticket_fn.replace("]", "") exception_ticket_fn = exception_ticket_fn.replace("[", "") exception_ticket_fn = exception_ticket_fn.replace(":", "_") try: f = open(exception_dir + "/" + exception_ticket_fn, "w") f.write("-" * 80 + "\n") f.write(repr(e) + "\n\n") f.write(socket.gethostname() + "\n\n") f.write(time.strftime("%Y-%m-%dT%H:%M:%S %a %d %B") + "\n\n") if hasattr(obj, '_da_requested_by'): f.write("requested by: " + " ".join(obj._da_requested_by) + "\n\n") try: f.write("factory knows: " + repr(analysisfactory.AnalysisFactory.cache) + "\n\n") except Exception as e: log(e) except Exception: log("unable to write exception!") log(e)
def report_exception(self,obj,e): exception_dir_root=self.filecacheroot+"/exception_reports/" exception_dir=exception_dir_root+"/"+obj.get_signature() try: os.makedirs(exception_dir) except os.error: log("unable to create exception dir!") # exissst? exception_ticket_fn=repr(obj)+"_"+time.strftime("%Y%m%d_%H%M%S")+".txt" exception_ticket_fn=exception_ticket_fn.replace("]","") exception_ticket_fn=exception_ticket_fn.replace("[","") exception_ticket_fn=exception_ticket_fn.replace(":","_") try: f=open(exception_dir+"/"+exception_ticket_fn,"w") f.write("-"*80+"\n") f.write(repr(e)+"\n\n") f.write(socket.gethostname()+"\n\n") f.write(time.strftime("%Y-%m-%dT%H:%M:%S %a %d %B")+"\n\n") if hasattr(obj,'_da_requested_by'): f.write("requested by: "+" ".join(obj._da_requested_by)+"\n\n") try: f.write("factory knows: "+repr(analysisfactory.AnalysisFactory.cache)+"\n\n") except Exception as e: log(e) except Exception: log("unable to write exception!") log(e)
def open(self, fn, mode="r", gz=False): local_fn = os.path.basename(fn) # !! if "w" == mode: log("will later put file to irods") self.register_pending_put(local_fn, fn) elif "r" == mode: log("will get file from irods:", fn, local_fn) self.get(fn, local_fn) else: raise Exception("do not understand this mode: " + mode) if gz: return gzip.open(local_fn, mode) return open(local_fn, mode)
def open(self,fn,mode="r",gz=False): local_fn=os.path.basename(fn) # !! if "w"==mode: log("will later put file to irods") self.register_pending_put(local_fn,fn) elif "r"==mode: log("will get file from irods:",fn,local_fn) self.get(fn,local_fn) else: raise Exception("do not understand this mode: "+mode) if gz: return gzip.open(local_fn,mode) return open(local_fn,mode)
def connect(self): if not hasattr(self, 'mysql_enabled'): raise Exception("mysql disabled") else: import MySQLdb if self.db is None: log("connecting to mysql") self.db = MySQLdb.connect(host="apcclwn12", # your host, usually localhost user="******", # your username port=42512, # unix_socket="/workdir/savchenk/mysql/var/mysql.socket", passwd=open(os.environ['HOME'] + "/.secret_mysql_password").read().strip(), # your password db="ddacache") # name of the data base return self.db
def retry_execute(self, cur, *a, **aa): timeout = aa['timeout'] if 'timeout' in aa else 10 e = Exception("while retry_execute") for x in range(timeout): try: log(a) self.total_attempts += 1 return cur.execute(*a) except Exception as e: self.failed_attempts += 1 log( render("{RED}mysql execute failed, try again{/}: " + repr(e)), x) time.sleep(1) raise e
def import_ddmodules(module_names=None): if module_names is None: module_names = dd_module_names modules = [] for dd_module_name in module_names: if isinstance(dd_module_name, str) and dd_module_name.startswith("dataanalysis."): continue log("importing", dd_module_name) dd_module = importing.load_by_name(dd_module_name) reload(dd_module[0]) modules.append(dd_module[0]) return modules
def get_module_description(self): module_description = [] for m in AnalysisFactory.dda_modules_used: log("module", m) if hasattr(m, "__dda_module_global_name__"): log("dda module global name", m.__dda_module_global_name__) module_origin='cache' if hasattr(m, "__dda_module_origin__"): module_origin=m.__dda_module_origin__ module_description.append([module_origin, m.__name__, m.__dda_module_global_name__]) else: if hasattr(m,'__file__'): module_description.append(['filesystem', m.__name__, m.__file__]) else: module_description.append(['filesystem', m.__name__, None]) return module_description
def connect(self): if not hasattr(self, 'mysql_enabled'): raise Exception("mysql disabled") else: import MySQLdb if self.db is None: log("connecting to mysql") self.db = MySQLdb.connect( host="apcclwn12", # your host, usually localhost user="******", # your username port=42512, # unix_socket="/workdir/savchenk/mysql/var/mysql.socket", passwd=open(os.environ['HOME'] + "/.secret_mysql_password").read().strip(), # your password db="ddacache") # name of the data base return self.db
def store(self,hashe,obj): # return # problem with files log("storing in memory cache:",hashe) if obj.run_for_hashe or obj.mutating: return if self.readonly_cache: return self.store_to_parent(hashe,obj) obj._da_stamp=obj.get_stamp() # or in the object? if not hasattr(self,'cache'): self.cache={} content=obj.export_data() self.cache[hashe]=content log("stored in transient",obj,hashe)
def construct_cached_file_path(self,hashe,obj): log("requested path for",hashe,obj) def hash_to_path(hashe): if isinstance(hashe,tuple): if hashe[0]=="analysis": # more universaly return hash_to_path(hashe[2])+"/"+hash_to_path(hashe[1]) if hashe[0]=="list": # more universaly return "..".join(map(hash_to_path,hashe[1:])) raise Exception("unknown tuple in the hash:"+str(hashe)) if isinstance(hashe,str): return hashe raise Exception("unknown class in the hash:"+str(hashe)) def hash_to_path2(hashe): #by32=lambda x:x[:8]+"/"+by8(x[8:]) if x[8:]!="" else x return hashe[2]+"/" + hashtools.shhash(repr(hashe[1])) return self.filecacheroot+"/"+hashe[1][1]+"/"+hashe[1][2]+"/" # choose to avoid overlapp
def import_ddmodules(module_names=None): if module_names is None: module_names=dd_module_names modules=[] for dd_module_name in module_names: if isinstance(dd_module_name,str) and dd_module_name.startswith("dataanalysis."): continue log("importing", dd_module_name,level="top") dd_module=importing.load_by_name(dd_module_name) modules.append(dd_module[0]) log("module",dd_module[1],"as",dd_module[0],"set to global namespace",level="top") globals()[dd_module[1]]=dd_module[0] # reload(dd_module[0]) return modules
def store(self, hashe, obj): # return # problem with files log("storing in memory cache:", hashe) if obj.run_for_hashe or obj.mutating: return if self.readonly_cache: return self.store_to_parent(hashe, obj) obj._da_stamp = obj.get_stamp() # or in the object? if not hasattr(self, 'cache'): self.cache = {} content = obj.export_data() self.cache[hashe] = content log("stored in transient", obj, hashe)
def get_module_description(self): module_description = [] for m in AnalysisFactory.dda_modules_used: log("module", m) if hasattr(m, "__dda_module_global_name__"): log("dda module global name", m.__dda_module_global_name__) module_origin = 'cache' if hasattr(m, "__dda_module_origin__"): module_origin = m.__dda_module_origin__ module_description.append( [module_origin, m.__name__, m.__dda_module_global_name__]) else: if hasattr(m, '__file__'): module_description.append( ['filesystem', m.__name__, m.__file__]) else: module_description.append(['filesystem', m.__name__, None]) return module_description
def construct_cached_file_path(self, hashe, obj): log("requested default cached file path") def hash_to_path(hashe): if isinstance(hashe, tuple): if hashe[0] == "analysis": # more universaly return hash_to_path(hashe[2]) + "/" + hash_to_path( hashe[1]) if hashe[0] == "list": # more universaly return "..".join(map(hash_to_path, hashe[1:])) raise Exception("unknown tuple in the hash:" + str(hashe)) if isinstance(hashe, str): return hashe raise Exception("unknown class in the hash:" + str(hashe)) def hash_to_path2(hashe): #by32=lambda x:x[:8]+"/"+by8(x[8:]) if x[8:]!="" else x return hashe[2] + "/" + hashtools.shhash(repr(hashe[1])) return self.filecacheroot + "/" + hash_to_path2( hashe) + "/" # choose to avoid overlapp
def process_filtered(self,level,obj,message,data): if self.url is None: return object_data={} object_data.update(data) object_data.update(self.extract_data(obj)) object_data['request_root_node'] = getattr(obj, 'request_root_node', False) params = dict( level=level, node=obj.get_signature(), message=message, ) params.update(object_data) params['action'] = data.get('state', 'progress') if self.url.startswith("file://"): fn=self.url[len("file://"):] with open(fn,'a') as f: f.write(str(datetime.datetime.now())+" "+level+": "+" in "+str(obj)+" got "+message+"; "+repr(object_data)+"\n") elif self.url.startswith("http://"): try: session = requests.Session() session.trust_env = False r=session.get(self.url, params=params) log("callback succeeded",self.url,params,r,level="callback") log_hook("callback",obj,message="callback succeeded",callback_url=self.url,callback_params=self.url_params,action_params=params,callback_response_content=r.content) return r,r.content except requests.ConnectionError as e: log("callback failed",self.url,params,":",e,level="callback") log_hook("callback",obj,message="callback failed!",callback_exception=repr(e),callback_url=self.url,callback_params=self.url_params,action_params=params) return "callback failed",repr(e) else: raise Exception("unknown callback method",self.url)
def make_record(self, hashe, content): import json log("will store", hashe, content) # con = lite.connect(self.filecacheroot+'/index.db') db = self.connect() c = cPickle.dumps(content) log("content as", c) if "_da_cached_path" in content: aux1 = content['_da_cached_path'] else: aux1 = "" with db: cur = db.cursor() self.retry_execute( cur, "CREATE TABLE IF NOT EXISTS cacheindex(hashe TEXT, fullhashe TEXT, content TEXT)" ) self.retry_execute( cur, "INSERT INTO cacheindex (hashe,fullhashe,content,timestamp,refdir) VALUES(%s,%s,%s,%s,%s)", (self.hashe2signature(hashe), json.dumps(hashe), c, time.time(), aux1)) log("now rows", cur.rowcount)
def statistics(self): if self.con is None: log("NOT connected") else: log("connected to", self.con) log("operations total/failed", self.total_attempts, self.failed_attempts)
def get_url(self, **extra_parameters): params = dict( target=self.identity.factory_name, modules=",".join(self.identity.get_modules_loadable()), assumptions=json.dumps(self.identity.assumptions), requested_by=",".join(self.requested_by), expected_hashe=json.dumps(self.identity.expected_hashe), mode="interactive", ) if extra_parameters is not None: params.update(extra_parameters) log("params", params) url_root = self.url_base + "/api/%(api_version)s/produce" % dict( api_version=self.api_version) url = url_root + "?" + urllib.urlencode(params) log("url:", url) return url
def delegate(self, hashe, obj): log(self,"will delegate",obj,"as",hashe) task_data = dict( object_identity=obj.get_identity().serialize(), ) r=self.queue.put( task_data, submission_data=dict( callbacks=obj.callbacks, request_origin="undefined", ), ) if r['state'] == "done": #todo obj.process_hooks("top",obj,message="task dependencies done while delegating, strange",state="locked?", task_comment="dependencies done before task") #self.queue.remember(task_data) # really is a race condit: retry #raise Exception("delegated task already done: the task is done but cache was not stored and delegated requested: ",task_data['object_identity']['factory_name'])# #,task_data['object_identity']['assumptions']) r['task_data']=task_data return r
def find(self,hashe): log("requested to find",hashe) cached_path=self.construct_cached_file_path(hashe,None) if self.filebackend.exists(cached_path+"/cache.pickle.gz"): log("found cache file:",cached_path+"/cache.pickle.gz") try: return self.load_content(hashe,None) except Exception as e: log("faild to load content! :"+repr(e)) return None log("no file found in",cached_path) return None
def find(self, hashe): log("requested to find", hashe) cached_path = self.construct_cached_file_path(hashe, None) if self.filebackend.exists(cached_path + "/cache.pickle.gz"): log("found cache file:", cached_path + "/cache.pickle.gz") try: return self.load_content(hashe, None) except Exception as e: log("faild to load content! :" + repr(e)) return None log("no file found in", cached_path) return None
def will_delegate(self, hashe, obj=None): log("trying for delegation", hashe) if self.delegating_analysis is None: log("this cache has no delegations allowed") return False if any([ hashe[-1] == option or re.match(option, hashe[-1]) for option in self.delegating_analysis ]): log("delegation IS allowed") return True else: log("failed to find:", hashe[-1], self.delegating_analysis) return False
def get_url(self,**extra_parameters): params=dict( target=self.identity.factory_name, modules=",".join(self.identity.get_modules_loadable()), assumptions=json.dumps(self.identity.assumptions), requested_by=",".join(self.requested_by), expected_hashe=json.dumps(self.identity.expected_hashe), mode="interactive", ) if extra_parameters is not None: params.update(extra_parameters) log("params",params) url_root=self.url_base+"/api/%(api_version)s/produce"%dict( api_version=self.api_version ) url=url_root + "?" + urllib.urlencode(params) log("url:",url) return url
def emerge_from_identity(identity): import dataanalysis.core as da da.reset() import_ddmodules(identity.modules) log("assumptions:", identity.assumptions) A = da.AnalysisFactory.byname(identity.factory_name) for assumption in identity.assumptions: log("requested assumption:", assumption) a = da.AnalysisFactory.byname(assumption[0]) a.import_data(assumption[1]) print(a, "from", assumption) producable_hashe = A.get_hashe() if identity.expected_hashe is None: log("expected hashe verification skipped") elif jsonify(producable_hashe) != jsonify(identity.expected_hashe): log("producable:\n", jsonify(producable_hashe), "\n") log("requested:\n", jsonify(identity.expected_hashe)) from dataanalysis import displaygraph displaygraph.plot_hashe(producable_hashe, "producable.png") displaygraph.plot_hashe(identity.expected_hashe, "expected.png") raise InconsitentEmergence( "unable to produce\n" + repr(jsonify(identity.expected_hashe)) + "\n while can produce" + repr(jsonify(producable_hashe)), jsonify(producable_hashe), jsonify(identity.expected_hashe), ) return A
def __init__(self,url): self.url=url try: self.url_params=urlparse.parse_qs(urlparse.urlparse(self.url).query) except Exception as e: log("failed extracting callback parameters:",e,level='callback-debug') self.url_params={} log('created callback',self.url,level='callback-debug') log('extracted callback params',self.url_params,'from',self.url,level='callback-debug')
def datafile_mapper(k,b): if is_datafile(b): log("requested to store DataFile", b) try: p = cached_path + os.path.basename(b.path) except Exception as e: log("failed:", e) log("path:", b.path) log("b:", b) raise b.cached_path = p + ".gz" if not p.endswith(".gz") else p b.store_stats = self.store_file(b.path, p) b._da_cached_path = cached_path b.cached_path_valid_url = True obj.note_resource_stats( {'resource_type': 'cache', 'resource_source': repr(self), 'filename': b.path, 'stats': b.store_stats, 'operation': 'store'}) return b
def filter_callback(self,level,obj,message,data): if data.get('state','unknown') in ["failed"]: return True if self.callback_accepted_classes is None: log("callback accepted:",message,level="callback") return True for accepted_class in self.callback_accepted_classes: try: if issubclass(obj.__class__, accepted_class): return True except Exception as e: log("unable to filter",obj,obj.__class__,accepted_class) raise log("callback NOT accepted:",message,repr(obj),level="callback-debug") log("accepted callbacks:",self.callback_accepted_classes,level="callback-debug") return False
def restore_from_parent(self, hashe, obj, rc=None): if self.parent is None: log("no parent available to call for") return None log("there is a parent available to call for:", self.parent) from_parent = self.parent.restore(hashe, obj, rc) if from_parent is not None: log("storing what restored from parent") self.store(hashe, obj) return from_parent
def datafile_mapper(k, b): if is_datafile(b): log("requested to store DataFile", b) try: p = cached_path + os.path.basename(b.path) except Exception as e: log("failed:", e) log("path:", b.path) log("b:", b) raise b.cached_path = p + ".gz" if not p.endswith(".gz") else p b.store_stats = self.store_file(b.path, p) b._da_cached_path = cached_path b.cached_path_valid_url = True obj.note_resource_stats({ 'resource_type': 'cache', 'resource_source': repr(self), 'filename': b.path, 'stats': b.store_stats, 'operation': 'store' }) return b
def make_record(self, hashe, content): log("will store", hashe, content) # con = lite.connect(self.filecacheroot+'/index.db') con = self.connect() c = cPickle.dumps(content) log("content as", c) with con: cur = con.cursor() self.retry_execute( cur, "CREATE TABLE IF NOT EXISTS cacheindex(hashe TEXT, content TEXT)" ) self.retry_execute(cur, "INSERT INTO cacheindex VALUES(?,?)", (self.hashe2signature(hashe), c)) log("now rows", cur.rowcount)