def test___init__sets_all(self): PARAM_NUM = 16 # how to get dynamically? with self.assertRaises(TypeError): results.Result(*range(PARAM_NUM + 1)) result = results.Result(*range(PARAM_NUM - 1), src={}) self.assertEqual( PARAM_NUM, len([x for x in result.__dict__.values() if x is not None]))
def dorequest(self, timeout=TIMEOUT, HttpMethod="POST", parsexsams=True): """ Sends the request to the database node and returns a result.Result instance. The request uses 'POST' requests by default. If the request fails or if stated in the parameter 'HttpMethod', 'GET' requests will be performed. The returned result will be parsed by default and the model defined in 'specmodel' will be populated by default (parseexams = True). """ self.xml = None #self.get_xml(self.Source.Requesturl) url = self.baseurl + self.querypath urlobj = urlsplit(url) if urlobj.scheme == 'https': conn = HTTPSConnection(urlobj.netloc, timeout=timeout) else: conn = HTTPConnection(urlobj.netloc, timeout=timeout) conn.putrequest(HttpMethod, urlobj.path + "?" + urlobj.query) conn.endheaders() try: res = conn.getresponse() except socket.timeout: # error handling has to be included self.status = 408 self.reason = "Socket timeout" raise TimeOutError self.status = res.status self.reason = res.reason if not parsexsams: if res.status == 200: result = r.Result() result.Content = res.read() elif res.status == 400 and HttpMethod == 'POST': # Try to use http-method: GET result = self.dorequest(HttpMethod='GET', parsexsams=parsexsams) else: result = None else: if res.status == 200: self.xml = res.read() result = r.Result() result.Xml = self.xml result.populate_model() elif res.status == 400 and HttpMethod == 'POST': # Try to use http-method: GET result = self.dorequest(HttpMethod='GET', parsexsams=parsexsams) else: result = None return result
def get_species(self): """ Queries all species from the database-node via TAP-XSAMS request and Query 'Select Species'. The list of species is saved in object species. Note: This does not work for all species ! """ # Some nodes do not understand this query at all # others do not understand the query SELECT SPECIES # therefore the following query is a small workaround for some nodes query=q.Query("SELECT SPECIES WHERE ((InchiKey!='UGFAIRIUMAVXCW'))") query.set_node(self) result = r.Result() result.set_query(query) result.do_query() result.populate_model() try: self.Molecules = result.data['Molecules'] except: pass try: self.Atoms = result.data['Atoms'] except: pass
def get_potential_words(self, word, k): '''Return a list of potential words matching input as a ResultList. :param word: input to find matches for :param k: maximum error numbers to search against (recommend 2 max) ''' result_list = results.ResultList() startnode = self.rootNode word = word + self.chr0 for key, next_node in startnode.child_nodes.iteritems(): result = results.Result() self._fuzzy_search_r(next_node, word, 0, result, result_list, k) return result_list
def check_for_updates(self, node): """ """ count_updates = 0 counter = 0 #species_list = [] cursor = self.conn.cursor() cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp) FROM Partitionfunctions ") rows = cursor.fetchall() num_rows = len(rows) query = q.Query() result = results.Result() for row in rows: counter += 1 print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), #id = row[1] vamdcspeciesid = row[2] # query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid query_string = "SELECT ALL WHERE SpeciesID=%s" % row[1][6:] query.set_query(query_string) query.set_node(node) result.set_query(query) try: changedate = result.getChangeDate() except: changedate = None tstamp = parser.parse(row[3] + " GMT") if changedate is None: print " -- UNKNOWN (Could not retrieve information)" continue if tstamp < changedate: print " -- UPDATE AVAILABLE " count_updates += 1 else: print " -- up to date" if count_updates == 0: print "\r No updates for your entries available" print "Done"
def update_database(self, add_nodes = None, insert_only = False, update_only = False): """ Checks if there are updates available for all entries. Updates will be retrieved from the resource specified in the database. All resources will be searched for new entries, which will be inserted if available. Additional resources can be specified via add_nodes. add_nodes: Single or List of node-instances (nodes.Node) """ # counter to identify which entry is currently processed counter = 0 # counter to count available updates count_updates = 0 # list of database - nodes which are currently in the local database dbnodes = [] # create an instance with all available vamdc-nodes nl = nodes.Nodelist() # attach additional nodes to the list of dbnodes (for insert) if not functions.isiterable(add_nodes): add_nodes = [add_nodes] for node in add_nodes: if node is None: pass elif not isinstance(node, nodes.Node): print "Could not attach node. Wrong type, it should be type <nodes.Node>" else: dbnodes.append(node) #-------------------------------------------------------------------- # Check if updates are available for entries # Get list of species in the database cursor = self.conn.cursor() cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() num_rows = len(rows) query = q.Query() result = results.Result() if not insert_only: print("----------------------------------------------------------") print "Looking for updates" print("----------------------------------------------------------") for row in rows: counter += 1 print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), try: node = nl.getnode(str(row[4])) except: node = None if node is None: print " -- RESOURCE NOT AVAILABLE" continue else: if node not in dbnodes: dbnodes.append(node) vamdcspeciesid = row[2] query_string = "SELECT ALL WHERE SpeciesID=%s" % row[1][6:] query.set_query(query_string) query.set_node(node) result.set_query(query) try: changedate = result.getChangeDate() except: changedate = None tstamp = parser.parse(row[3] + " GMT") if changedate is None: print " -- UNKNOWN (Could not retrieve information)" continue if tstamp < changedate: print " -- UPDATE AVAILABLE " count_updates += 1 print " -- PERFORM UPDATE -- " query_string = "SELECT SPECIES WHERE SpeciesID=%s" % row[1][6:] query.set_query(query_string) query.set_node(node) result.set_query(query) result.do_query() result.populate_model() insert_species_data(result.data['Molecules'], update = True) print " -- UPDATE DONE -- " else: print " -- up to date" if count_updates == 0: print "\r No updates for your entries available" print "Done" else: cursor.execute("SELECT distinct PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() for row in rows: try: node = nl.getnode(str(row[0])) except: node = None if node is None: print " -- RESOURCE NOT AVAILABLE" continue else: if node not in dbnodes: dbnodes.append(node) if update_only: return # Check if there are new entries available #--------------------------------------------------------- # Check all dbnodes for new species counter = 0 insert_molecules_list = [] for node in dbnodes: print("----------------------------------------------------------") print "Query '{dbname}' for new species ".format(dbname=node.name) print("----------------------------------------------------------") node.get_species() for id in node.Molecules: try: cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Timestamp FROM Partitionfunctions WHERE PF_SpeciesID=?", [(id)]) exist = cursor.fetchone() if exist is None: print " %s" % node.Molecules[id] insert_molecules_list.append(node.Molecules[id]) counter += 1 except Exception, e: print e print id print "There are %d new species available" % counter print("----------------------------------------------------------") print "Start insert" print("----------------------------------------------------------") self.insert_species_data(insert_molecules_list, node) print("----------------------------------------------------------") print "Done"
def insert_species_data(self, species, node, update=False): """ Inserts new species into the local database species: species which will be inserted node: vamdc-node / type: instance(nodes.node) update: if True then all entries in the local database with the same species-id will be deleted before the insert is performed. """ # create a list of names. New names have not to be in that list names_black_list = [] cursor = self.conn.cursor() cursor.execute("SELECT PF_Name FROM Partitionfunctions") rows = cursor.fetchall() for row in rows: names_black_list.append(row[0]) #---------------------------------------------------------- # Create a list of species for which transitions will be # retrieved and inserted in the database. # Species have to be in the Partitionfunctions - table if not functions.isiterable(species): species = [species] #-------------------------------------------------------------- for specie in species: num_transitions = {} # will contain a list of names which belong to one specie species_names = {} # list will contain species whose insert-failed species_with_error = [] # check if specie is of type Molecule if isinstance(specie, specmodel.Molecule): speciesid = specie.SpeciesID vamdcspeciesid = specie.VAMDCSpeciesID formula = specie.OrdinaryStructuralFormula else: try: if isinstance(specie, str) and len(specie) == 27: vamdcspeciesid = specie speciesid = None except: print "Specie is not of wrong type" print "Type Molecule or string (Inchikey) is allowed" continue if speciesid: print "Processing: {speciesid}".format(speciesid = speciesid) else: print "Processing: {vamdcspeciesid}".format(vamdcspeciesid = vamdcspeciesid) try: # Create query string query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid query = q.Query() result = results.Result() # Get data from the database query.set_query(query_string) query.set_node(node) result.set_query(query) result.do_query() result.populate_model() except: print " -- Error: Could not fetch and process data" continue #--------------------------------------- cursor = self.conn.cursor() cursor.execute('BEGIN TRANSACTION') #------------------------------------------------------------------------------------------------------ # if update is allowed then all entries in the database for the given species-id will be # deleted, and thus replaced by the new data if update: cursor.execute("SELECT PF_Name FROM Partitionfunctions WHERE PF_SpeciesID = ?", (speciesid, )) rows = cursor.fetchall() for row in rows: names_black_list.remove(row[0]) cursor.execute("DELETE FROM Transitions WHERE T_Name = ?", (row[0], )) cursor.execute("DELETE FROM Partitionfunctions WHERE PF_Name = ?", (row[0], )) #------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------ # Insert all transitions num_transitions_found = len(result.data['RadiativeTransitions']) counter_transitions = 0 for trans in result.data['RadiativeTransitions']: counter_transitions+=1 print "\r insert transition %d of %d" % (counter_transitions, num_transitions_found), # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid). # Insert transitions only if they belong to the correct specie if result.data['RadiativeTransitions'][trans].SpeciesID == speciesid or speciesid is None: id = str(result.data['RadiativeTransitions'][trans].SpeciesID) # if an error has occured already then there will be no further insert if id in species_with_error: continue formula = str(result.data['Molecules'][id].OrdinaryStructuralFormula) # Get upper and lower state from the states table try: upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef] lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef] except (KeyError, AttributeError): print " -- Error: State is missing" species_with_error.append(id) continue # Get string which identifies the vibrational states involved in the transition t_state = self.getvibstatelabel(upper_state, lower_state) # Get hyperfinestructure info if hfsInfo is None # only then the hfsInfo has not been inserted in the species name # (there can be multiple values in the complete dataset t_hfs = '' try: for pc in result.data['RadiativeTransitions'][trans].ProcessClass: if str(pc)[:3] == 'hyp': t_hfs = str(pc) except Exception, e: print "Error: %s", e t_name = "%s; %s; %s" % (formula, t_state, t_hfs) t_name = t_name.strip() # check if name is in the list of forbidden names and add counter if so i = 1 while t_name in names_black_list: t_name = "%s#%d" % (t_name.split('#')[0], i) i += 1 # update list of distinct species names. if id in species_names: if not t_name in species_names[id]: species_names[id].append(t_name) num_transitions[t_name] = 0 else: species_names[id] = [t_name] num_transitions[t_name] = 0 frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue) try: uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy) except TypeError: print " -- Error uncertainty not available" species_with_error.append(id) continue # Get statistical weight if present try: weight = int(upper_state.TotalStatisticalWeight) except: print " -- Error statistical weight not available" species_with_error.append(id) continue # Get nuclear spin isomer (ortho/para) if present #print "%s; %s" % (result.data['RadiativeTransitions'][trans].Id, upper_state.Id) try: nsiName = upper_state.NuclearSpinIsomerName except AttributeError: nsiName = None # Insert transition into database try: cursor.execute("""INSERT INTO Transitions ( T_Name, T_Frequency, T_EinsteinA, T_Uncertainty, T_EnergyLower, T_UpperStateDegeneracy, T_HFS, T_UpperStateQuantumNumbers, T_LowerStateQuantumNumbers) VALUES (?, ?,?,?,?, ?,?, ?,?)""", (t_name, "%lf" % frequency, "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA), uncertainty, "%lf" % float(lower_state.StateEnergyValue), weight, #upper_state.QuantumNumbers.case, t_hfs, str(upper_state.QuantumNumbers.qn_string), str(lower_state.QuantumNumbers.qn_string), )) num_transitions[t_name] += 1 except Exception, e: print "Transition has not been inserted:\n Error: %s" % e
def insert_radiativetransitions(self, species, node): """ """ # will contain a list of names which belong to one specie species_names = {} #---------------------------------------------------------- # Create a list of species for which transitions will be # retrieved and inserted in the database. # Species have to be in the Partitionfunctions - table if not functions.isiterable(species): species = [species] species_list = [] cursor = self.conn.cursor() for specie in species: cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_HFS FROM Partitionfunctions WHERE PF_SpeciesID=? or PF_VamdcSpeciesID=?", (specie, specie)) rows = cursor.fetchall() for row in rows: species_list.append([row[0], row[1], row[2], row[3]]) #-------------------------------------------------------------- for specie in species_list: num_transitions = {} #------------------------------------ # Retrieve data from the database id = specie[1] vamdcspeciesid = specie[2] hfs = specie[3] name = specie[0] # name should be formated like 'formula; state-info; hfs-info' name_array = name.split(';') formula = name_array[0].strip() try: stateInfo = name_array[1].strip() except: stateInfo = '' # get hfs-flag from the name. try: hfsInfo = name_array[2].strip() except: hfsInfo = '' # Create query string query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid if hfs is not None and hfs.strip() != '': query_string += " and RadTransCode='%s'" % hfs query = q.Query() result = results.Result() # Get data from the database query.set_query(query_string) query.set_node(node) result.set_query(query) result.do_query() result.populate_model() #--------------------------------------- cursor = self.conn.cursor() cursor.execute('BEGIN TRANSACTION') cursor.execute("DELETE FROM Transitions WHERE T_Name = ?", (name,)) for trans in result.data['RadiativeTransitions']: # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid). # Insert transitions only if they belong to the correct specie if result.data['RadiativeTransitions'][trans].SpeciesID == id: # Get upper and lower state from the states table upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef] lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef] # Get string which identifies the vibrational states involved in the transition try: if upper_state.QuantumNumbers.vibstate == lower_state.QuantumNumbers.vibstate: t_state = str(upper_state.QuantumNumbers.vibstate).strip() else: #vup = upper_state.QuantumNumbers.vibstate.split(",") #vlow = lower_state.QuantumNumbers.vibstate.split(",") v_dict = {} for label in list(set(upper_state.QuantumNumbers.qn_dict.keys() + lower_state.QuantumNumbers.qn_dict.keys())): if isVibrationalStateLabel(label): try: value_up = upper_state.QuantumNumbers.qn_dict[label] except: value_up = 0 try: value_low = lower_state.QuantumNumbers.qn_dict[label] except: value_low = 0 v_dict[label] = [value_up, value_low] v_string = '' valup_string = '' vallow_string = '' for v in v_dict: v_string += "%s," % v valup_string += "%s," % v_dict[v][0] vallow_string += "%s," % v_dict[v][1] if len(v_dict) > 1: t_state = "(%s)=(%s)-(%s)" % (v_string[:-1], valup_string[:-1], vallow_string[:-1]) else: t_state = "%s=%s-%s" % (v_string[:-1], valup_string[:-1], vallow_string[:-1]) #t_state = '(%s)-(%s)' % (upper_state.QuantumNumbers.vibstate,lower_state.QuantumNumbers.vibstate) except: t_state = '' # go to the next transition if state does not match if t_state != stateInfo and stateInfo is not None and stateInfo != '': continue # Get hyperfinestructure info if hfsInfo is None # only then the hfsInfo has not been inserted in the species name # (there can be multiple values in the complete dataset if hfsInfo == '': t_hfs = '' try: for pc in result.data['RadiativeTransitions'][trans].ProcessClass: if str(pc)[:3] == 'hyp': t_hfs = str(pc) except Exception, e: print "Error: %s", e else: t_hfs = hfsInfo # if hfs is not None and empty then only Transitions without hfs-flag # should be processed if hfs is not None and hfs != t_hfs: continue t_name = "%s; %s; %s" % (formula, t_state, t_hfs) t_name = t_name.strip() # update list of distinct species names. if id in species_names: if not t_name in species_names[id]: species_names[id].append(t_name) num_transitions[t_name] = 0 else: species_names[id] = [t_name] num_transitions[t_name] = 0 frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue) uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy) # Get statistical weight if present if upper_state.TotalStatisticalWeight: weight = int(upper_state.TotalStatisticalWeight) else: weight = None # Get nuclear spin isomer (ortho/para) if present try: nsiName = upper_state.NuclearSpinIsomerName except AttributeError: nsiName = None # Insert transition into database try: cursor.execute("""INSERT INTO Transitions ( T_Name, T_Frequency, T_EinsteinA, T_Uncertainty, T_EnergyLower, T_UpperStateDegeneracy, T_HFS, T_UpperStateQuantumNumbers, T_LowerStateQuantumNumbers) VALUES (?, ?,?,?,?, ?,?, ?,?)""", (t_name, "%lf" % frequency, "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA), uncertainty, "%lf" % float(lower_state.StateEnergyValue), weight, #upper_state.QuantumNumbers.case, t_hfs, str(upper_state.QuantumNumbers.qn_string), str(lower_state.QuantumNumbers.qn_string), )) num_transitions[t_name] += 1 except Exception, e: print "Transition has not been inserted:\n Error: %s" % e
def _fuzzy_search_r(self, node, word, pos, result, result_list, k, alrdy_trans=False): '''recursive function to do search work explanation: this performs a fuzzy search for correct words that match the given word input :param node: current trienode :param word: remainder of current word. ends in chr0 :param pos: position in trienode data :param result: current built-up result (i.e. correct word) :param result_list: pointer to list of results :param k: allowed edits (total remaining) :param alrdy_trans: only allow one transposition ''' #pdb.set_trace() if node.data[pos:] == word: # we found a match! Also we must be on a leaf, so nowhere else to go result.data += node.data[pos:-1] result.metadata = node.metadata result_list.add(result) return # need to add current char to result result.data += node.data[pos] # for edits that advance along the trie, two cases depending on # if at end of current node or not: if pos >= (len(node.data)-1): # if at end of node for next_char, next_node in node.child_nodes.iteritems(): if k > 0: # DELETION: new_result = results.Result(result, 'deletion') self._fuzzy_search_r(next_node, word, 0, new_result, result_list, k-1) # SUBSTITUTION: if len(word) > 0 and node.data[pos] != word[0]: new_result = results.Result(result, 'substitution') self._fuzzy_search_r(next_node, word[1:], 0, new_result, result_list, k-1) # CORRECT: if len(word) > 0 and node.data[pos] == word[0]: new_result = results.Result(result, 'continue') self._fuzzy_search_r(next_node, word[1:], 0, new_result, result_list, k) else: # if not at end of node if k > 0: # DELETION: new_result = results.Result(result, 'deletion') self._fuzzy_search_r(node, word, pos+1, new_result, result_list, k-1) # SUBSTITUTION: if len(word) > 0 and node.data[pos] != word[0]: new_result = results.Result(result, 'substitution') self._fuzzy_search_r(node, word[1:], pos+1, new_result, result_list, k-1) # CORRECT: if len(word) > 0 and node.data[pos] == word[0]: new_result = results.Result(result, 'continue') self._fuzzy_search_r(node, word[1:], pos+1, new_result, result_list, k) if k > 0: # INSERTION - doesn't step forward so doesn't matter which if # we are at the end of a node or not new_result = results.Result(result, 'insertion') new_result.data = new_result.data[:-1] self._fuzzy_search_r(node, word[1:], pos, new_result, result_list, k-1) # TRANSPOSITION - same: if not alrdy_trans and len(word) >= 2: #TODO: why only allow one? new_word = word[1] + word[0] + word[2:] new_result = results.Result(result, 'transposition') new_result.data = new_result.data[:-1] self._fuzzy_search_r(node, new_word, pos, new_result, result_list, k-1, alrdy_trans=True)
def test_single_result(): r = results.Result() r.first_name = "Abc" assert list(r.keys()) == ["first_name"] assert r.first_name == "Abc" assert hasattr(r, "first_name")
#Save result as a .json file dataset = os.path.basename( os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(args.tfrecords_dir)))))) round = 1 if args.round is not None: round = args.round if cfg.component_N == False: n_traces = 1 else: n_traces = 3 result = results.Result(cfg.window_size, n_traces, cfg.n_clusters, cfg.model, truePositives, falsePositives, trueNegatives, falseNegatives, accuracy, precision, recall, f1, locationAccuracy, round, dataset) now = datetime.datetime.now() now_str = str(now.year) + str(now.month).zfill(2) + str( now.day).zfill(2) + str(now.hour).zfill(2) + str( now.minute).zfill(2) + str(now.second).zfill(2) filename = "output/eval_" + now_str + "_" + str( round) + "_" + dataset + "_" + str(cfg.window_size) + "_" + str( cfg.n_clusters) + "_" + str( cfg.n_traces) + "_" + cfg.model + ".json" result.export_json(filename) #if args.redirect_stdout_stderr: # stdout_stderr_file.close()
def test_get_confusion_matrix(self): cm = results.Result(*range(6), ytrue=[0, 1], ypred=[0, 0], src={}).get_confusion_matrix() self.assertTrue(np.all(cm == np.array([[1, 0], [1, 0]])))