def insert_species_data(self, species, node, update=False): """ Inserts new species into the local database species: species which will be inserted node: vamdc-node / type: instance(nodes.node) update: if True then all entries in the local database with the same species-id will be deleted before the insert is performed. """ # create a list of names. New names have not to be in that list names_black_list = [] cursor = self.conn.cursor() cursor.execute("SELECT PF_Name FROM Partitionfunctions") rows = cursor.fetchall() for row in rows: names_black_list.append(row[0]) #---------------------------------------------------------- # Create a list of species for which transitions will be # retrieved and inserted in the database. # Species have to be in the Partitionfunctions - table if not functions.isiterable(species): species = [species] #-------------------------------------------------------------- for specie in species: num_transitions = {} # will contain a list of names which belong to one specie species_names = {} # list will contain species whose insert-failed species_with_error = [] # check if specie is of type Molecule if isinstance(specie, specmodel.Molecule): speciesid = specie.SpeciesID vamdcspeciesid = specie.VAMDCSpeciesID formula = specie.OrdinaryStructuralFormula else: try: if isinstance(specie, str) and len(specie) == 27: vamdcspeciesid = specie speciesid = None except: print "Specie is not of wrong type" print "Type Molecule or string (Inchikey) is allowed" continue if speciesid: print "Processing: {speciesid}".format(speciesid = speciesid) else: print "Processing: {vamdcspeciesid}".format(vamdcspeciesid = vamdcspeciesid) try: # Create query string query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid query = q.Query() result = results.Result() # Get data from the database query.set_query(query_string) query.set_node(node) result.set_query(query) result.do_query() result.populate_model() except: print " -- Error: Could not fetch and process data" continue #--------------------------------------- cursor = self.conn.cursor() cursor.execute('BEGIN TRANSACTION') #------------------------------------------------------------------------------------------------------ # if update is allowed then all entries in the database for the given species-id will be # deleted, and thus replaced by the new data if update: cursor.execute("SELECT PF_Name FROM Partitionfunctions WHERE PF_SpeciesID = ?", (speciesid, )) rows = cursor.fetchall() for row in rows: names_black_list.remove(row[0]) cursor.execute("DELETE FROM Transitions WHERE T_Name = ?", (row[0], )) cursor.execute("DELETE FROM Partitionfunctions WHERE PF_Name = ?", (row[0], )) #------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------ # Insert all transitions num_transitions_found = len(result.data['RadiativeTransitions']) counter_transitions = 0 for trans in result.data['RadiativeTransitions']: counter_transitions+=1 print "\r insert transition %d of %d" % (counter_transitions, num_transitions_found), # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid). # Insert transitions only if they belong to the correct specie if result.data['RadiativeTransitions'][trans].SpeciesID == speciesid or speciesid is None: id = str(result.data['RadiativeTransitions'][trans].SpeciesID) # if an error has occured already then there will be no further insert if id in species_with_error: continue formula = str(result.data['Molecules'][id].OrdinaryStructuralFormula) # Get upper and lower state from the states table try: upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef] lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef] except (KeyError, AttributeError): print " -- Error: State is missing" species_with_error.append(id) continue # Get string which identifies the vibrational states involved in the transition t_state = self.getvibstatelabel(upper_state, lower_state) # Get hyperfinestructure info if hfsInfo is None # only then the hfsInfo has not been inserted in the species name # (there can be multiple values in the complete dataset t_hfs = '' try: for pc in result.data['RadiativeTransitions'][trans].ProcessClass: if str(pc)[:3] == 'hyp': t_hfs = str(pc) except Exception, e: print "Error: %s", e t_name = "%s; %s; %s" % (formula, t_state, t_hfs) t_name = t_name.strip() # check if name is in the list of forbidden names and add counter if so i = 1 while t_name in names_black_list: t_name = "%s#%d" % (t_name.split('#')[0], i) i += 1 # update list of distinct species names. if id in species_names: if not t_name in species_names[id]: species_names[id].append(t_name) num_transitions[t_name] = 0 else: species_names[id] = [t_name] num_transitions[t_name] = 0 frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue) try: uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy) except TypeError: print " -- Error uncertainty not available" species_with_error.append(id) continue # Get statistical weight if present try: weight = int(upper_state.TotalStatisticalWeight) except: print " -- Error statistical weight not available" species_with_error.append(id) continue # Get nuclear spin isomer (ortho/para) if present #print "%s; %s" % (result.data['RadiativeTransitions'][trans].Id, upper_state.Id) try: nsiName = upper_state.NuclearSpinIsomerName except AttributeError: nsiName = None # Insert transition into database try: cursor.execute("""INSERT INTO Transitions ( T_Name, T_Frequency, T_EinsteinA, T_Uncertainty, T_EnergyLower, T_UpperStateDegeneracy, T_HFS, T_UpperStateQuantumNumbers, T_LowerStateQuantumNumbers) VALUES (?, ?,?,?,?, ?,?, ?,?)""", (t_name, "%lf" % frequency, "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA), uncertainty, "%lf" % float(lower_state.StateEnergyValue), weight, #upper_state.QuantumNumbers.case, t_hfs, str(upper_state.QuantumNumbers.qn_string), str(lower_state.QuantumNumbers.qn_string), )) num_transitions[t_name] += 1 except Exception, e: print "Transition has not been inserted:\n Error: %s" % e
def update_database(self, add_nodes = None, insert_only = False, update_only = False): """ Checks if there are updates available for all entries. Updates will be retrieved from the resource specified in the database. All resources will be searched for new entries, which will be inserted if available. Additional resources can be specified via add_nodes. add_nodes: Single or List of node-instances (nodes.Node) """ # counter to identify which entry is currently processed counter = 0 # counter to count available updates count_updates = 0 # list of database - nodes which are currently in the local database dbnodes = [] # create an instance with all available vamdc-nodes nl = nodes.Nodelist() # attach additional nodes to the list of dbnodes (for insert) if not functions.isiterable(add_nodes): add_nodes = [add_nodes] for node in add_nodes: if node is None: pass elif not isinstance(node, nodes.Node): print "Could not attach node. Wrong type, it should be type <nodes.Node>" else: dbnodes.append(node) #-------------------------------------------------------------------- # Check if updates are available for entries # Get list of species in the database cursor = self.conn.cursor() cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() num_rows = len(rows) query = q.Query() result = results.Result() if not insert_only: print("----------------------------------------------------------") print "Looking for updates" print("----------------------------------------------------------") for row in rows: counter += 1 print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), try: node = nl.getnode(str(row[4])) except: node = None if node is None: print " -- RESOURCE NOT AVAILABLE" continue else: if node not in dbnodes: dbnodes.append(node) vamdcspeciesid = row[2] query_string = "SELECT ALL WHERE SpeciesID=%s" % row[1][6:] query.set_query(query_string) query.set_node(node) result.set_query(query) try: changedate = result.getChangeDate() except: changedate = None tstamp = parser.parse(row[3] + " GMT") if changedate is None: print " -- UNKNOWN (Could not retrieve information)" continue if tstamp < changedate: print " -- UPDATE AVAILABLE " count_updates += 1 print " -- PERFORM UPDATE -- " query_string = "SELECT SPECIES WHERE SpeciesID=%s" % row[1][6:] query.set_query(query_string) query.set_node(node) result.set_query(query) result.do_query() result.populate_model() insert_species_data(result.data['Molecules'], update = True) print " -- UPDATE DONE -- " else: print " -- up to date" if count_updates == 0: print "\r No updates for your entries available" print "Done" else: cursor.execute("SELECT distinct PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() for row in rows: try: node = nl.getnode(str(row[0])) except: node = None if node is None: print " -- RESOURCE NOT AVAILABLE" continue else: if node not in dbnodes: dbnodes.append(node) if update_only: return # Check if there are new entries available #--------------------------------------------------------- # Check all dbnodes for new species counter = 0 insert_molecules_list = [] for node in dbnodes: print("----------------------------------------------------------") print "Query '{dbname}' for new species ".format(dbname=node.name) print("----------------------------------------------------------") node.get_species() for id in node.Molecules: try: cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Timestamp FROM Partitionfunctions WHERE PF_SpeciesID=?", [(id)]) exist = cursor.fetchone() if exist is None: print " %s" % node.Molecules[id] insert_molecules_list.append(node.Molecules[id]) counter += 1 except Exception, e: print e print id print "There are %d new species available" % counter print("----------------------------------------------------------") print "Start insert" print("----------------------------------------------------------") self.insert_species_data(insert_molecules_list, node) print("----------------------------------------------------------") print "Done"
def insert_radiativetransitions(self, species, node): """ """ # will contain a list of names which belong to one specie species_names = {} #---------------------------------------------------------- # Create a list of species for which transitions will be # retrieved and inserted in the database. # Species have to be in the Partitionfunctions - table if not functions.isiterable(species): species = [species] species_list = [] cursor = self.conn.cursor() for specie in species: cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_HFS FROM Partitionfunctions WHERE PF_SpeciesID=? or PF_VamdcSpeciesID=?", (specie, specie)) rows = cursor.fetchall() for row in rows: species_list.append([row[0], row[1], row[2], row[3]]) #-------------------------------------------------------------- for specie in species_list: num_transitions = {} #------------------------------------ # Retrieve data from the database id = specie[1] vamdcspeciesid = specie[2] hfs = specie[3] name = specie[0] # name should be formated like 'formula; state-info; hfs-info' name_array = name.split(';') formula = name_array[0].strip() try: stateInfo = name_array[1].strip() except: stateInfo = '' # get hfs-flag from the name. try: hfsInfo = name_array[2].strip() except: hfsInfo = '' # Create query string query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid if hfs is not None and hfs.strip() != '': query_string += " and RadTransCode='%s'" % hfs query = q.Query() result = results.Result() # Get data from the database query.set_query(query_string) query.set_node(node) result.set_query(query) result.do_query() result.populate_model() #--------------------------------------- cursor = self.conn.cursor() cursor.execute('BEGIN TRANSACTION') cursor.execute("DELETE FROM Transitions WHERE T_Name = ?", (name,)) for trans in result.data['RadiativeTransitions']: # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid). # Insert transitions only if they belong to the correct specie if result.data['RadiativeTransitions'][trans].SpeciesID == id: # Get upper and lower state from the states table upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef] lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef] # Get string which identifies the vibrational states involved in the transition try: if upper_state.QuantumNumbers.vibstate == lower_state.QuantumNumbers.vibstate: t_state = str(upper_state.QuantumNumbers.vibstate).strip() else: #vup = upper_state.QuantumNumbers.vibstate.split(",") #vlow = lower_state.QuantumNumbers.vibstate.split(",") v_dict = {} for label in list(set(upper_state.QuantumNumbers.qn_dict.keys() + lower_state.QuantumNumbers.qn_dict.keys())): if isVibrationalStateLabel(label): try: value_up = upper_state.QuantumNumbers.qn_dict[label] except: value_up = 0 try: value_low = lower_state.QuantumNumbers.qn_dict[label] except: value_low = 0 v_dict[label] = [value_up, value_low] v_string = '' valup_string = '' vallow_string = '' for v in v_dict: v_string += "%s," % v valup_string += "%s," % v_dict[v][0] vallow_string += "%s," % v_dict[v][1] if len(v_dict) > 1: t_state = "(%s)=(%s)-(%s)" % (v_string[:-1], valup_string[:-1], vallow_string[:-1]) else: t_state = "%s=%s-%s" % (v_string[:-1], valup_string[:-1], vallow_string[:-1]) #t_state = '(%s)-(%s)' % (upper_state.QuantumNumbers.vibstate,lower_state.QuantumNumbers.vibstate) except: t_state = '' # go to the next transition if state does not match if t_state != stateInfo and stateInfo is not None and stateInfo != '': continue # Get hyperfinestructure info if hfsInfo is None # only then the hfsInfo has not been inserted in the species name # (there can be multiple values in the complete dataset if hfsInfo == '': t_hfs = '' try: for pc in result.data['RadiativeTransitions'][trans].ProcessClass: if str(pc)[:3] == 'hyp': t_hfs = str(pc) except Exception, e: print "Error: %s", e else: t_hfs = hfsInfo # if hfs is not None and empty then only Transitions without hfs-flag # should be processed if hfs is not None and hfs != t_hfs: continue t_name = "%s; %s; %s" % (formula, t_state, t_hfs) t_name = t_name.strip() # update list of distinct species names. if id in species_names: if not t_name in species_names[id]: species_names[id].append(t_name) num_transitions[t_name] = 0 else: species_names[id] = [t_name] num_transitions[t_name] = 0 frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue) uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy) # Get statistical weight if present if upper_state.TotalStatisticalWeight: weight = int(upper_state.TotalStatisticalWeight) else: weight = None # Get nuclear spin isomer (ortho/para) if present try: nsiName = upper_state.NuclearSpinIsomerName except AttributeError: nsiName = None # Insert transition into database try: cursor.execute("""INSERT INTO Transitions ( T_Name, T_Frequency, T_EinsteinA, T_Uncertainty, T_EnergyLower, T_UpperStateDegeneracy, T_HFS, T_UpperStateQuantumNumbers, T_LowerStateQuantumNumbers) VALUES (?, ?,?,?,?, ?,?, ?,?)""", (t_name, "%lf" % frequency, "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA), uncertainty, "%lf" % float(lower_state.StateEnergyValue), weight, #upper_state.QuantumNumbers.case, t_hfs, str(upper_state.QuantumNumbers.qn_string), str(lower_state.QuantumNumbers.qn_string), )) num_transitions[t_name] += 1 except Exception, e: print "Transition has not been inserted:\n Error: %s" % e
def update_database(self, add_nodes = None, insert_only = False, update_only = False): """ Checks if there are updates available for all entries. Updates will be retrieved from the resource specified in the database. All resources will be searched for new entries, which will be inserted if available. Additional resources can be specified via add_nodes. add_nodes: Single or List of node-instances (nodes.Node) """ # counter to identify which entry is currently processed counter = 0 # counter to count available updates count_updates = 0 # list of database - nodes which are currently in the local database dbnodes = [] # create an instance with all available vamdc-nodes nl = nodes.Nodelist() # attach additional nodes to the list of dbnodes (for insert) if not functions.isiterable(add_nodes): add_nodes = [add_nodes] for node in add_nodes: if node is None: pass elif not isinstance(node, nodes.Node): print "Could not attach node. Wrong type, it should be type <nodes.Node>" else: dbnodes.append(node) #-------------------------------------------------------------------- # Check if updates are available for entries # Get list of species in the database cursor = self.conn.cursor() cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() num_rows = len(rows) query = q.Query() request = r.Request() if not insert_only: print("----------------------------------------------------------") print "Looking for updates" print("----------------------------------------------------------") for row in rows: counter += 1 print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), try: node = nl.getnode(str(row[4])) except: node = None if node is None: print " -- RESOURCE NOT AVAILABLE" continue else: if node not in dbnodes: dbnodes.append(node) vamdcspeciesid = row[2] # Currently the database prefix XCDMS- or XJPL- has to be removed speciesid = row[1].split("-")[1] query_string = "SELECT ALL WHERE SpeciesID=%s" % speciesid request.setnode(node) request.setquery(query_string) errorcode = None try: changedate = request.getlastmodified() # except r.TimeOutError, e: # errorcode = e.strerror # changedate = None except Exception, e: errorcode = e.strerror changedate = None tstamp = parser.parse(row[3] + " GMT") if changedate is None: if errorcode is None: errorcode = "UNKNOWN" print " -- %s (Could not retrieve information)" % errorcode continue if tstamp < changedate: print " -- UPDATE AVAILABLE " count_updates += 1 print " -- PERFORM UPDATE -- " query_string = "SELECT SPECIES WHERE SpeciesID=%s" % speciesid request.setquery(query_string) result = request.dorequest() try: result.populate_model() except: print " Error: Could not process data " continue try: self.insert_species_data(result.data['Molecules'], node, update = True) except: print " Error: Could not update data " continue print " -- UPDATE DONE -- " else: print " -- up to date" if count_updates == 0: print "\r No updates for your entries available" print "Done"
def update_database(self, add_nodes = None, insert_only = False, update_only = False, delete_archived = False): """ Checks if there are updates available for all entries. Updates will be retrieved from the resource specified in the database. All resources will be searched for new entries, which will be inserted if available. Additional resources can be specified via add_nodes. :ivar nodes.Node add_nodes: Single or List of node-instances. :ivar boolean insert_only: Just insert new species and skip updates if True :ivar boolean update_only: Just updates species and skip inserts if True """ # counter to identify which entry is currently processed counter = 0 # counter to count available updates count_updates = 0 # list of database - nodes which are currently in the local database dbnodes = [] # create an instance with all available vamdc-nodes nl = nodes.Nodelist() # attach additional nodes to the list of dbnodes (for insert) if not functions.isiterable(add_nodes): add_nodes = [add_nodes] for node in add_nodes: if node is None: pass elif not isinstance(node, nodes.Node): print "Could not attach node. Wrong type, it should be type <nodes.Node>" else: dbnodes.append(node) #-------------------------------------------------------------------- # Check if updates are available for entries # Get list of species in the database cursor = self.conn.cursor() cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() num_rows = len(rows) query = q.Query() request = r.Request() if not insert_only: print("----------------------------------------------------------") print "Looking for updates" print("----------------------------------------------------------") for row in rows: counter += 1 print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), try: node = nl.getnode(str(row[4])) except: node = None if node is None: print " -- RESOURCE NOT AVAILABLE" continue else: if node not in dbnodes: dbnodes.append(node) vamdcspeciesid = row[2] # Currently the database prefix XCDMS- or XJPL- has to be removed speciesid = row[1].split("-")[1] query_string = "SELECT ALL WHERE SpeciesID=%s" % speciesid request.setnode(node) request.setquery(query_string) errorcode = None try: changedate = request.getlastmodified() except r.NoContentError, e: # Delete entries which are not available anymore if request.status == 204: if delete_archived: print " -- ENTRY ARCHIVED AND WILL BE DELETED -- " del_specie = self.delete_species(row[1]) if len(del_specie) > 0: print "\r Done" else: print " -- ENTRY ARCHIVED -- " continue except r.TimeOutError, e: # errorcode = e.strerror # changedate = None print " -- TIMEOUT: Could not check entry -- " continue except:
def update_database(self, add_nodes = None, insert_only = False, update_only = False, delete_archived = False): """ Checks if there are updates available for all entries. Updates will be retrieved from the resource specified in the database. All resources will be searched for new entries, which will be inserted if available. Additional resources can be specified via add_nodes. :ivar nodes.Node add_nodes: Single or List of node-instances. :ivar boolean insert_only: Just insert new species and skip updates if True :ivar boolean update_only: Just updates species and skip inserts if True """ # counter to identify which entry is currently processed counter = 0 # counter to count available updates count_updates = 0 # list of database - nodes which are currently in the local database dbnodes = [] # create an instance with all available vamdc-nodes nl = nodes.Nodelist() # attach additional nodes to the list of dbnodes (for insert) if not functions.isiterable(add_nodes): add_nodes = [add_nodes] for node in add_nodes: if node is None: pass elif not isinstance(node, nodes.Node): print("Could not attach node. Wrong type, it should be type <nodes.Node>") else: dbnodes.append(node) #-------------------------------------------------------------------- # Check if updates are available for entries # Get list of species in the database cursor = self.conn.cursor() cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() num_rows = len(rows) query = q.Query() request = r.Request() if not insert_only: print("----------------------------------------------------------") print("Looking for updates") print("----------------------------------------------------------") for row in rows: counter += 1 print("%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), end=' ') try: node = nl.getnode(str(row[4])) except: node = None if node is None: print(" -- RESOURCE NOT AVAILABLE") continue else: if node not in dbnodes: dbnodes.append(node) vamdcspeciesid = row[2] # Currently the database prefix XCDMS- or XJPL- has to be removed speciesid = row[1].split("-")[1] query_string = "SELECT ALL WHERE SpeciesID=%s" % speciesid request.setnode(node) request.setquery(query_string) errorcode = None try: changedate = request.getlastmodified() except r.NoContentError as e: # Delete entries which are not available anymore if request.status == 204: if delete_archived: print(" -- ENTRY ARCHIVED AND WILL BE DELETED -- ") del_specie = self.delete_species(row[1]) if len(del_specie) > 0: print("\r Done") else: print(" -- ENTRY ARCHIVED -- ") continue except r.TimeOutError as e: # errorcode = e.strerror # changedate = None print(" -- TIMEOUT: Could not check entry -- ") continue except: changedate = None print("Could not retrieve information - Unexpected error:", sys.exc_info()[0]) continue tstamp = parser.parse(row[3] + " GMT") if changedate is None: if errorcode is None: errorcode = "UNKNOWN" print(" -- %s (Could not retrieve information)" % errorcode) continue if tstamp < changedate: print(" -- UPDATE AVAILABLE ") count_updates += 1 print(" -- PERFORM UPDATE -- ") query_string = "SELECT SPECIES WHERE SpeciesID=%s" % speciesid request.setquery(query_string) result = request.dorequest() try: result.populate_model() except: print(" Error: Could not process data ") continue try: self.insert_species_data(result.data['Molecules'], node, update = True) except: print(" Error: Could not update data ") continue print(" -- UPDATE DONE -- ") else: print(" -- up to date") if count_updates == 0: print("\r No updates for your entries available") print("Done") else: cursor.execute("SELECT distinct PF_ResourceID FROM Partitionfunctions ") rows = cursor.fetchall() for row in rows: try: node = nl.getnode(str(row[0])) except: node = None if node is None: print(" -- RESOURCE NOT AVAILABLE") continue else: if node not in dbnodes: dbnodes.append(node) if update_only: return # Check if there are new entries available #--------------------------------------------------------- # Check all dbnodes for new species for node in dbnodes: counter = 0 insert_molecules_list = [] print("----------------------------------------------------------") print("Query '{dbname}' for new species ".format(dbname=node.name)) print("----------------------------------------------------------") request.setnode(node) result = request.getspecies() for id in result.data['Molecules']: try: cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Timestamp FROM Partitionfunctions WHERE PF_SpeciesID=?", [(id)]) exist = cursor.fetchone() if exist is None: print(" %s" % result.data['Molecules'][id]) insert_molecules_list.append(result.data['Molecules'][id]) counter += 1 except Exception as e: print(e) print(id) print("There are %d new species available" % counter) print("----------------------------------------------------------") print("Start insert") print("----------------------------------------------------------") self.insert_species_data(insert_molecules_list, node) print("----------------------------------------------------------") print("Done")
def insert_species_data(self, species, node, update=False): """ Checks the VAMDC database node for new species and inserts them into the local database :ivar list species: species which will be inserted :ivar nodes.Node node: vamdc-node / type: instance(nodes.node) :ivar boolean update: if True then all entries in the local database with the same species-id will be deleted before the insert is performed. """ # create a list of names. New names have not to be in that list names_black_list = [] cursor = self.conn.cursor() cursor.execute("SELECT PF_Name FROM Partitionfunctions") rows = cursor.fetchall() for row in rows: names_black_list.append(row[0]) #---------------------------------------------------------- # Create a list of species for which transitions will be # retrieved and inserted in the database. # Species have to be in the Partitionfunctions - table if not functions.isiterable(species): species = [species] #-------------------------------------------------------------- for specie in species: # if species is a dictionary (e.g. specmodel.Molecules) # then get the species-instance instead of only the key. if isinstance(species, dict): specie = species[specie] num_transitions = {} # will contain a list of names which belong to one specie species_names = {} # list will contain species whose insert-failed species_with_error = [] # check if specie is of type Molecule if isinstance(specie, specmodel.Molecule): speciesid = specie.SpeciesID vamdcspeciesid = specie.VAMDCSpeciesID formula = specie.OrdinaryStructuralFormula if isinstance(specie, specmodel.Atom): speciesid = specie.SpeciesID vamdcspeciesid = specie.VAMDCSpeciesID # formula = specie.OrdinaryStructuralFormula else: try: if isinstance(specie, str) and len(specie) == 27: vamdcspeciesid = specie speciesid = None except: print("Specie is not of wrong type") print("Type Molecule or string (Inchikey) is allowed") continue if speciesid: print("Processing: {speciesid}".format(speciesid = speciesid)) else: print("Processing: {vamdcspeciesid}".format(vamdcspeciesid = vamdcspeciesid)) try: # Create query string query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid request = r.Request() # Get data from the database request.setnode(node) request.setquery(query_string) result = request.dorequest() #result.populate_model() except Exception as e: print(" -- Error %s: Could not fetch and process data" % e.strerror) continue #--------------------------------------- cursor = self.conn.cursor() cursor.execute('BEGIN TRANSACTION') #------------------------------------------------------------------------------------------------------ # if update is allowed then all entries in the database for the given species-id will be # deleted, and thus replaced by the new data if update: if speciesid is None: for sid in list(result.data['Molecules'].keys()) + list(result.data['Atoms'].keys()): deleted_species = self.delete_species(sid) for ds in deleted_species: names_black_list.remove(ds) else: deleted_species = self.delete_species(speciesid) for ds in deleted_species: names_black_list.remove(ds) #------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------ # Insert all transitions num_transitions_found = len(result.data['RadiativeTransitions']) counter_transitions = 0 for trans in result.data['RadiativeTransitions']: counter_transitions+=1 print("\r insert transition %d of %d" % (counter_transitions, num_transitions_found), end=' ') # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid). # Insert transitions only if they belong to the correct specie if result.data['RadiativeTransitions'][trans].SpeciesID == speciesid or speciesid is None: id = str(result.data['RadiativeTransitions'][trans].SpeciesID) # if an error has occured already then there will be no further insert if id in species_with_error: continue # Get upper and lower state from the states table try: upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef] lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef] except (KeyError, AttributeError): print(" -- Error: State is missing") species_with_error.append(id) continue if id in result.data['Atoms'].keys(): is_atom = True is_molecule = False atomname = self.createatomname(result.data['Atoms'][id]) elif id in result.data['Molecules'].keys(): is_atom = False is_molecule = True formula = str(result.data['Molecules'][id].OrdinaryStructuralFormula) # Get string which identifies the vibrational states involved in the transition t_state = self.getvibstatelabel(upper_state, lower_state) else: continue # Get hyperfinestructure info if hfsInfo is None # only then the hfsInfo has not been inserted in the species name # (there can be multiple values in the complete dataset t_hfs = '' try: for pc in result.data['RadiativeTransitions'][trans].ProcessClass: if str(pc)[:3] == 'hyp': t_hfs = str(pc) except Exception as e: print("Error: %s", e) frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue) try: uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy) except TypeError: print(" -- Error uncertainty not available") species_with_error.append(id) continue # Get statistical weight if present try: weight = int(upper_state.TotalStatisticalWeight) except: print(" -- Error statistical weight not available") species_with_error.append(id) continue # Get nuclear spin isomer (ortho/para) if present try: nsiName = upper_state.NuclearSpinIsomerName except AttributeError: nsiName = None # if nuclear spin isomer is defined then two entries have to be generated if nsiName is not None and nsiName != '': nsinames = [nsiName, None] nsiStateOrigin = result.data['States']["%s" % upper_state.NuclearSpinIsomerLowestEnergy] nsiEnergyOffset = float(nsiStateOrigin.StateEnergyValue) else: nsinames = [None] for nsiName in nsinames: # create name if is_atom == True: t_name = atomname else: t_affix = ";".join([affix for affix in [t_hfs, nsiName] if affix is not None and affix!='']) t_name = "%s;%s;%s" % (formula, t_state, t_affix) t_name = t_name.strip() # remove all blanks in the name t_name = t_name.replace(' ','') # check if name is in the list of forbidden names and add counter if so i = 1 while t_name in names_black_list: t_name = "%s#%d" % (t_name.split('#')[0], i) i += 1 # update list of distinct species names. if id in species_names: if not t_name in species_names[id]: species_names[id].append(t_name) num_transitions[t_name] = 0 else: species_names[id] = [t_name] num_transitions[t_name] = 0 if nsiName is not None: lowerStateEnergy = float(lower_state.StateEnergyValue) - nsiEnergyOffset else: lowerStateEnergy = float(lower_state.StateEnergyValue) # Insert transition into database try: cursor.execute("""INSERT INTO Transitions ( T_Name, T_Frequency, T_EinsteinA, T_Uncertainty, T_EnergyLower, T_UpperStateDegeneracy, T_HFS, T_UpperStateQuantumNumbers, T_LowerStateQuantumNumbers) VALUES (?, ?,?,?,?, ?,?, ?,?)""", (t_name, "%lf" % frequency, "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA), uncertainty, "%lf" % lowerStateEnergy, weight, #upper_state.QuantumNumbers.case, t_hfs, str(upper_state.QuantumNumbers.qn_string), str(lower_state.QuantumNumbers.qn_string), )) num_transitions[t_name] += 1 except Exception as e: print("Transition has not been inserted:\n Error: %s" % e) print("\n") #------------------------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------------------------ # delete transitions for all entries where an error occured during the insert for id in species_with_error: print(" -- Species {id} has not been inserted due to an error ".format(id=str(id))) try: for name in species_names[id]: cursor.execute("DELETE FROM Transitions WHERE T_Name=?", (str(name),)) print(" -- {name} ".format(name=str(name))) except: pass #------------------------------------------------------------------------------------------------------ # insert specie in Partitionfunctions (header) table if node: resourceID = node.identifier url = node.url else: resourceID = 'NULL' url = 'NULL' # Insert molecules for id in species_names: if id in species_with_error: continue for name in species_names[id]: # determine hyperfine-structure affix and nuclear spin isomer affix try: hfs = '' nsi = '' for affix in name.split("#")[0].split(';',2)[2].split(";"): if affix.strip()[:3] == 'hyp': hfs = affix.strip() else: # if affix does not identify hyperfine structure # it identifies the nuclear spin isomer nsi = affix.strip() except: hfs = '' # Insert row in partitionfunctions try: if id in result.data['Atoms']: if 'Comment' not in result.data['Atoms'][id].__dict__: result.data['Atoms'][id].Comment = "" cursor.execute("INSERT INTO Partitionfunctions (PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Comment, PF_ResourceID, PF_URL, PF_Timestamp) VALUES (?,?,?,?,?,?,?)", ("%s" % name, id, "%s" % (result.data['Atoms'][id].VAMDCSpeciesID), "%s" % (result.data['Atoms'][id].Comment), resourceID, "%s%s%s" % (url, "sync?LANG=VSS2&REQUEST=doQuery&FORMAT=XSAMS&QUERY=Select+*+where+SpeciesID%3D", id), datetime.now(), )) else: cursor.execute("INSERT INTO Partitionfunctions (PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_HFS, PF_NuclearSpinIsomer, PF_Comment, PF_ResourceID, PF_URL, PF_Timestamp) VALUES (?,?,?,?,?,?,?,?,?)", ("%s" % name, id, "%s" % (result.data['Molecules'][id].VAMDCSpeciesID), hfs, nsi, "%s" % (result.data['Molecules'][id].Comment), resourceID, "%s%s%s" % (url, "sync?LANG=VSS2&REQUEST=doQuery&FORMAT=XSAMS&QUERY=Select+*+where+SpeciesID%3D", id), datetime.now(), )) except sqlite3.Error as e: print("An error occurred:", e.args[0]) except Exception as e: print("An error occurred:", e.args[0]) print(list(result.data['Molecules'].keys())) # Update Partitionfunctions if id in result.data['Atoms'].keys(): for temperature in Temperatures: pf_values = specmodel.calculate_partitionfunction(result.data['States'], temperature = temperature) try: field = ("PF_%.3lf" % float(temperature)).replace('.', '_') sql = "UPDATE Partitionfunctions SET %s=? WHERE PF_SpeciesID=? " % field cursor.execute(sql, (pf_values[id], id)) except Exception as e: print("SQL-Error: %s " % sql) print(pf_value, id) print("Error: %d: %s" % (e.args[0], e.args[1])) else: try: for pfs in result.data['Molecules'][id].PartitionFunction: if 'NuclearSpinIsomer' not in pfs.__dict__: nsi = '' else: nsi = pfs.NuclearSpinIsomer for temperature in pfs.values.keys(): try: field = ("PF_%.3lf" % float(temperature)).replace('.', '_') sql = "UPDATE Partitionfunctions SET %s=? WHERE PF_SpeciesID=? AND IFNULL(PF_NuclearSpinIsomer,'')=?" % field cursor.execute(sql, (pfs.values[temperature], id, nsi)) except Exception as e: print("SQL-Error: %s " % sql) print(pfs.values[temperature], id) print("Error: %d: %s" % (e.args[0], e.args[1])) except: pass #------------------------------------------------------------------------------------------------------ for row in num_transitions: print(" for %s inserted %d transitions" % (row, num_transitions[row])) self.conn.commit() cursor.close()