Esempio n. 1
0
    def insert_species_data(self, species, node, update=False):
        """
        Inserts new species into the local database

        species: species which will be inserted
        node:    vamdc-node / type: instance(nodes.node)
        update:  if True then all entries in the local database with the same
                 species-id will be deleted before the insert is performed.
        """

        # create a list of names. New names have not to be in that list
        names_black_list = []
        
        cursor = self.conn.cursor()
        cursor.execute("SELECT PF_Name FROM Partitionfunctions")
        rows = cursor.fetchall()
        for row in rows:
            names_black_list.append(row[0])

        #----------------------------------------------------------
        # Create a list of species for which transitions will be
        # retrieved and inserted in the database.
        # Species have to be in the Partitionfunctions - table

        if not functions.isiterable(species):
            species = [species]

        #--------------------------------------------------------------

        for specie in species:
            num_transitions = {}
            # will contain a list of names which belong to one specie
            species_names = {}
            # list will contain species whose insert-failed
            species_with_error = []

            # check if specie is of type Molecule
            if isinstance(specie, specmodel.Molecule):
                speciesid = specie.SpeciesID
                vamdcspeciesid = specie.VAMDCSpeciesID
                formula = specie.OrdinaryStructuralFormula
            else:
                try:
                    if isinstance(specie, str) and len(specie) == 27:
                        vamdcspeciesid = specie
                        speciesid = None
                except:
                    print "Specie is not of wrong type"
                    print "Type Molecule or string (Inchikey) is allowed"
                    continue
            if speciesid:
                print "Processing: {speciesid}".format(speciesid = speciesid)
            else:
                print "Processing: {vamdcspeciesid}".format(vamdcspeciesid = vamdcspeciesid)
                

            try:
                # Create query string
                query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid
                query = q.Query()
                result = results.Result()

                # Get data from the database
                query.set_query(query_string)
                query.set_node(node)

                result.set_query(query)
                result.do_query()
                result.populate_model()
            except:
                print " -- Error: Could not fetch and process data"
                continue    
            #---------------------------------------

            cursor = self.conn.cursor()
            cursor.execute('BEGIN TRANSACTION')

            #------------------------------------------------------------------------------------------------------
            # if update is allowed then all entries in the database for the given species-id will be
            # deleted, and thus replaced by the new data
            if update:
                cursor.execute("SELECT PF_Name FROM Partitionfunctions WHERE PF_SpeciesID = ?", (speciesid, ))
                rows = cursor.fetchall()
                for row in rows:
                    names_black_list.remove(row[0])
                    cursor.execute("DELETE FROM Transitions WHERE T_Name = ?", (row[0], ))
                    cursor.execute("DELETE FROM Partitionfunctions WHERE PF_Name = ?", (row[0], ))
            #------------------------------------------------------------------------------------------------------
            
            #------------------------------------------------------------------------------------------------------
            # Insert all transitions
            num_transitions_found = len(result.data['RadiativeTransitions'])
            counter_transitions = 0
            for trans in result.data['RadiativeTransitions']:
                counter_transitions+=1
                print "\r insert transition %d of %d" % (counter_transitions, num_transitions_found),
                # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid).
                # Insert transitions only if they belong to the correct specie

                if result.data['RadiativeTransitions'][trans].SpeciesID == speciesid or speciesid is None:
                    id = str(result.data['RadiativeTransitions'][trans].SpeciesID)
                    # if an error has occured already then there will be no further insert
                    if id in species_with_error:
                        continue
                    
                    formula = str(result.data['Molecules'][id].OrdinaryStructuralFormula)
                    # Get upper and lower state from the states table
                    try:
                        upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef]
                        lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef]
                    except (KeyError, AttributeError):
                        print " -- Error: State is missing"
                        species_with_error.append(id)
                        continue
                        
                    # Get string which identifies the vibrational states involved in the transition
                    t_state = self.getvibstatelabel(upper_state, lower_state)

                    # Get hyperfinestructure info if hfsInfo is None
                    # only then the hfsInfo has not been inserted in the species name
                    # (there can be multiple values in the complete dataset
                    t_hfs = ''
                    try:
                        for pc in result.data['RadiativeTransitions'][trans].ProcessClass:
                            if str(pc)[:3] == 'hyp':
                                t_hfs = str(pc)
                    except Exception, e:
                            print "Error: %s", e

                    t_name = "%s; %s; %s" % (formula, t_state, t_hfs)
                    t_name = t_name.strip()
                    # check if name is in the list of forbidden names and add counter if so
                    i = 1
                    while t_name in names_black_list:
                        t_name = "%s#%d" % (t_name.split('#')[0], i)
                        i += 1
                    # update list of distinct species names.
                    if id in species_names:
                        if not t_name in species_names[id]:
                            species_names[id].append(t_name)
                            num_transitions[t_name] = 0
                    else:
                        species_names[id] = [t_name]
                        num_transitions[t_name] = 0

                    frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue)
                    try:
                        uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy)
                    except TypeError:
                        print " -- Error uncertainty not available"
                        species_with_error.append(id)
                        continue

                    # Get statistical weight if present
                    try:
                        weight = int(upper_state.TotalStatisticalWeight)
                    except:
                        print " -- Error statistical weight not available"
                        species_with_error.append(id)
                        continue
                        
                    # Get nuclear spin isomer (ortho/para) if present
                    #print "%s; %s" % (result.data['RadiativeTransitions'][trans].Id, upper_state.Id)
                    try:
                        nsiName = upper_state.NuclearSpinIsomerName
                    except AttributeError:
                        nsiName = None

                    # Insert transition into database
                    try:
                        cursor.execute("""INSERT INTO Transitions (
                        T_Name,
                        T_Frequency,
                        T_EinsteinA,
                        T_Uncertainty,
                        T_EnergyLower,
                        T_UpperStateDegeneracy,
                        T_HFS,
                        T_UpperStateQuantumNumbers,
                        T_LowerStateQuantumNumbers) VALUES
                        (?, ?,?,?,?, ?,?, ?,?)""",
                                       (t_name,
                                        "%lf" % frequency,
                                        "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA),
                                        uncertainty, "%lf" % float(lower_state.StateEnergyValue),
                                        weight,
                                        #upper_state.QuantumNumbers.case,
                                        t_hfs,
                                        str(upper_state.QuantumNumbers.qn_string),
                                        str(lower_state.QuantumNumbers.qn_string),
                                        ))
                        num_transitions[t_name] += 1
                    except Exception, e:
                        print "Transition has not been inserted:\n Error: %s" % e
Esempio n. 2
0
    def update_database(self, add_nodes = None, insert_only = False, update_only = False):
        """
        Checks if there are updates available for all entries. Updates will
        be retrieved from the resource specified in the database.
        All resources will be searched for new entries, which will be inserted
        if available. Additional resources can be specified via add_nodes.

        add_nodes: Single or List of node-instances (nodes.Node)
        """
        # counter to identify which entry is currently processed
        counter = 0
        # counter to count available updates
        count_updates = 0
        # list of database - nodes which are currently in the local database
        dbnodes = []
        # create an instance with all available vamdc-nodes
        nl = nodes.Nodelist()

        # attach additional nodes to the list of dbnodes (for insert)
        if not functions.isiterable(add_nodes):
            add_nodes = [add_nodes]
        for node in add_nodes:
            if node is None:
                pass
            elif not isinstance(node, nodes.Node):
                print "Could not attach node. Wrong type, it should be type <nodes.Node>"
            else:
                dbnodes.append(node)
        
        #--------------------------------------------------------------------
        # Check if updates are available for entries

        # Get list of species in the database
        cursor = self.conn.cursor()
        cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ")
        rows = cursor.fetchall()
        num_rows = len(rows)
        query = q.Query()
        result = results.Result()

        if not insert_only:
            print("----------------------------------------------------------")
            print "Looking for updates"
            print("----------------------------------------------------------")

            for row in rows:
                counter += 1
                print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]),
                try:
                    node = nl.getnode(str(row[4]))
                except:
                    node = None
                if node is None:
                    print " -- RESOURCE NOT AVAILABLE"
                    continue
                else:
                    if node not in dbnodes:
                        dbnodes.append(node)

                vamdcspeciesid = row[2]
                query_string = "SELECT ALL WHERE SpeciesID=%s" % row[1][6:]
                query.set_query(query_string)
                query.set_node(node)

                result.set_query(query)
                try:
                    changedate = result.getChangeDate()
                except:
                    changedate = None

                tstamp = parser.parse(row[3] + " GMT")
                if changedate is None:
                    print " -- UNKNOWN (Could not retrieve information)"
                    continue
                if tstamp < changedate:
                    print " -- UPDATE AVAILABLE "
                    count_updates += 1
                    print " -- PERFORM UPDATE -- "
                    query_string = "SELECT SPECIES WHERE SpeciesID=%s" % row[1][6:]
                    query.set_query(query_string)
                    query.set_node(node)

                    result.set_query(query)
                    result.do_query()
                    result.populate_model()
                    insert_species_data(result.data['Molecules'], update = True)
                    print " -- UPDATE DONE    -- "
                else:
                    print " -- up to date"

            if count_updates == 0:
                print "\r No updates for your entries available"
            print "Done"
        else:
            cursor.execute("SELECT distinct PF_ResourceID FROM Partitionfunctions ")
            rows = cursor.fetchall()
            for row in rows:
                try:
                    node = nl.getnode(str(row[0]))
                except:
                    node = None
                if node is None:
                    print " -- RESOURCE NOT AVAILABLE"
                    continue
                else:
                    if node not in dbnodes:
                        dbnodes.append(node)


        if update_only:
            return
        
        # Check if there are new entries available

        #---------------------------------------------------------
        # Check all dbnodes for new species
        counter = 0
        insert_molecules_list = []
        for node in dbnodes:
            print("----------------------------------------------------------")
            print "Query '{dbname}' for new species ".format(dbname=node.name)
            print("----------------------------------------------------------")
            node.get_species()
            for id in node.Molecules:
                try:
                    cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Timestamp FROM Partitionfunctions WHERE PF_SpeciesID=?", [(id)])
                    exist = cursor.fetchone()
                    if exist is None:
                        print "   %s" % node.Molecules[id]
                        insert_molecules_list.append(node.Molecules[id])
                        counter += 1
                except Exception, e:
                    print e
                    print id
            print "There are %d new species available" % counter
            print("----------------------------------------------------------")
            print "Start insert"
            print("----------------------------------------------------------")           
            self.insert_species_data(insert_molecules_list, node)
            print("----------------------------------------------------------")           
            print "Done"
Esempio n. 3
0
    def insert_radiativetransitions(self, species, node):
        """
        """
        # will contain a list of names which belong to one specie
        species_names = {}

        #----------------------------------------------------------
        # Create a list of species for which transitions will be
        # retrieved and inserted in the database.
        # Species have to be in the Partitionfunctions - table

        if not functions.isiterable(species):
            species = [species]

        species_list = []

        cursor = self.conn.cursor()
        for specie in species:
            cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_HFS FROM Partitionfunctions WHERE PF_SpeciesID=? or PF_VamdcSpeciesID=?",
                           (specie, specie))
            rows = cursor.fetchall()
            for row in rows:
                species_list.append([row[0], row[1], row[2], row[3]])

        #--------------------------------------------------------------

        for specie in species_list:
            num_transitions = {}

            #------------------------------------
            # Retrieve data from the database

            id = specie[1]
            vamdcspeciesid = specie[2]
            hfs = specie[3]
            name = specie[0]
            # name should be formated like 'formula; state-info; hfs-info'
            name_array = name.split(';')

            formula = name_array[0].strip()
            try:
                stateInfo = name_array[1].strip()
            except:
                stateInfo = ''

            # get hfs-flag from the name.
            try:
                hfsInfo = name_array[2].strip()
            except:
                hfsInfo = ''

            # Create query string
            query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid
            if hfs is not None and hfs.strip() != '':
                query_string += " and RadTransCode='%s'" % hfs
            query = q.Query()
            result = results.Result()

            # Get data from the database
            query.set_query(query_string)
            query.set_node(node)

            result.set_query(query)
            result.do_query()
            result.populate_model()
            #---------------------------------------

            cursor = self.conn.cursor()
            cursor.execute('BEGIN TRANSACTION')

            cursor.execute("DELETE FROM Transitions WHERE T_Name = ?", (name,))

            for trans in result.data['RadiativeTransitions']:
                # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid).
                # Insert transitions only if they belong to the correct specie

                if result.data['RadiativeTransitions'][trans].SpeciesID == id:

                    # Get upper and lower state from the states table
                    upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef]
                    lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef]

                    # Get string which identifies the vibrational states involved in the transition
                    try:
                        if upper_state.QuantumNumbers.vibstate == lower_state.QuantumNumbers.vibstate:
                            t_state = str(upper_state.QuantumNumbers.vibstate).strip()
                        else:
                            #vup = upper_state.QuantumNumbers.vibstate.split(",")
                            #vlow = lower_state.QuantumNumbers.vibstate.split(",")
                            v_dict = {}
                            for label in list(set(upper_state.QuantumNumbers.qn_dict.keys() + lower_state.QuantumNumbers.qn_dict.keys())):
                                if isVibrationalStateLabel(label):
                                    try:
                                        value_up = upper_state.QuantumNumbers.qn_dict[label]
                                    except:
                                        value_up = 0
                                    try:
                                        value_low = lower_state.QuantumNumbers.qn_dict[label]
                                    except:
                                        value_low = 0
                                    v_dict[label] = [value_up, value_low]
                            v_string = ''
                            valup_string = ''
                            vallow_string = ''
                            for v in v_dict:
                                v_string += "%s," % v
                                valup_string += "%s," % v_dict[v][0]
                                vallow_string += "%s," % v_dict[v][1]
                            if len(v_dict) > 1:
                                t_state = "(%s)=(%s)-(%s)" % (v_string[:-1], valup_string[:-1], vallow_string[:-1])
                            else:
                                t_state = "%s=%s-%s" % (v_string[:-1], valup_string[:-1], vallow_string[:-1])

                            #t_state = '(%s)-(%s)' % (upper_state.QuantumNumbers.vibstate,lower_state.QuantumNumbers.vibstate)
                    except:
                        t_state = ''

                    # go to the next transition if state does not match
                    if t_state != stateInfo and stateInfo is not None and stateInfo != '':
                        continue

                    # Get hyperfinestructure info if hfsInfo is None
                    # only then the hfsInfo has not been inserted in the species name
                    # (there can be multiple values in the complete dataset
                    if hfsInfo == '':
                        t_hfs = ''
                        try:
                            for pc in result.data['RadiativeTransitions'][trans].ProcessClass:
                                if str(pc)[:3] == 'hyp':
                                    t_hfs = str(pc)
                        except Exception, e:
                            print "Error: %s", e
                    else:
                        t_hfs = hfsInfo

                    # if hfs is not None and empty then only Transitions without hfs-flag
                    # should be processed
                    if hfs is not None and hfs != t_hfs:
                        continue

                    t_name = "%s; %s; %s" % (formula, t_state, t_hfs)
                    t_name = t_name.strip()
                    # update list of distinct species names.
                    if id in species_names:
                        if not t_name in species_names[id]:
                            species_names[id].append(t_name)
                            num_transitions[t_name] = 0
                    else:
                        species_names[id] = [t_name]
                        num_transitions[t_name] = 0

                    frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue)
                    uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy)

                    # Get statistical weight if present
                    if upper_state.TotalStatisticalWeight:
                        weight = int(upper_state.TotalStatisticalWeight)
                    else:
                        weight = None

                    # Get nuclear spin isomer (ortho/para) if present
                    try:
                        nsiName = upper_state.NuclearSpinIsomerName
                    except AttributeError:
                        nsiName = None

                    # Insert transition into database
                    try:
                        cursor.execute("""INSERT INTO Transitions (
                        T_Name,
                        T_Frequency,
                        T_EinsteinA,
                        T_Uncertainty,
                        T_EnergyLower,
                        T_UpperStateDegeneracy,
                        T_HFS,
                        T_UpperStateQuantumNumbers,
                        T_LowerStateQuantumNumbers) VALUES
                        (?, ?,?,?,?, ?,?, ?,?)""",
                                       (t_name,
                                        "%lf" % frequency,
                                        "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA),
                                        uncertainty, "%lf" % float(lower_state.StateEnergyValue),
                                        weight,
                                        #upper_state.QuantumNumbers.case,
                                        t_hfs,
                                        str(upper_state.QuantumNumbers.qn_string),
                                        str(lower_state.QuantumNumbers.qn_string),
                                        ))
                        num_transitions[t_name] += 1
                    except Exception, e:
                        print "Transition has not been inserted:\n Error: %s" % e
Esempio n. 4
0
    def update_database(self, add_nodes = None, insert_only = False, update_only = False):
        """
        Checks if there are updates available for all entries. Updates will
        be retrieved from the resource specified in the database.
        All resources will be searched for new entries, which will be inserted
        if available. Additional resources can be specified via add_nodes.

        add_nodes: Single or List of node-instances (nodes.Node)
        """
        # counter to identify which entry is currently processed
        counter = 0
        # counter to count available updates
        count_updates = 0
        # list of database - nodes which are currently in the local database
        dbnodes = []
        # create an instance with all available vamdc-nodes
        nl = nodes.Nodelist()

        # attach additional nodes to the list of dbnodes (for insert)
        if not functions.isiterable(add_nodes):
            add_nodes = [add_nodes]
        for node in add_nodes:
            if node is None:
                pass
            elif not isinstance(node, nodes.Node):
                print "Could not attach node. Wrong type, it should be type <nodes.Node>"
            else:
                dbnodes.append(node)
        
        #--------------------------------------------------------------------
        # Check if updates are available for entries

        # Get list of species in the database
        cursor = self.conn.cursor()
        cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ")
        rows = cursor.fetchall()
        num_rows = len(rows)
        query = q.Query()
        request = r.Request()

        if not insert_only:
            print("----------------------------------------------------------")
            print "Looking for updates"
            print("----------------------------------------------------------")

            for row in rows:
                counter += 1
                print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]),
                try:
                    node = nl.getnode(str(row[4]))
                except:
                    node = None
                if node is None:
                    print " -- RESOURCE NOT AVAILABLE"
                    continue
                else:
                    if node not in dbnodes:
                        dbnodes.append(node)

                vamdcspeciesid = row[2]
                # Currently the database prefix XCDMS- or XJPL- has to be removed
                speciesid = row[1].split("-")[1]
                query_string = "SELECT ALL WHERE SpeciesID=%s" % speciesid
                request.setnode(node)
                request.setquery(query_string)

                errorcode = None
                try:
                    changedate = request.getlastmodified()
#                except r.TimeOutError, e:
#                    errorcode = e.strerror
#                    changedate = None
                except Exception, e:
                    errorcode = e.strerror
                    changedate = None

                tstamp = parser.parse(row[3] + " GMT")
                if changedate is None:
                    if errorcode is None:
                        errorcode = "UNKNOWN"
                    print " -- %s (Could not retrieve information)" % errorcode
                    continue
                if tstamp < changedate:
                    print " -- UPDATE AVAILABLE "
                    count_updates += 1
                    print " -- PERFORM UPDATE -- "
                    query_string = "SELECT SPECIES WHERE SpeciesID=%s" % speciesid
                    request.setquery(query_string)

                    result = request.dorequest()
                    try:
                        result.populate_model()
                    except:
                        print " Error: Could not process data "
                        continue
                    try:
                        self.insert_species_data(result.data['Molecules'], node, update = True)
                    except:
                        print " Error: Could not update data "
                        continue
                    print " -- UPDATE DONE    -- "
                else:
                    print " -- up to date"

            if count_updates == 0:
                print "\r No updates for your entries available"
            print "Done"
Esempio n. 5
0
    def update_database(self, add_nodes = None, insert_only = False, update_only = False, delete_archived = False):
        """
        Checks if there are updates available for all entries. Updates will
        be retrieved from the resource specified in the database.
        All resources will be searched for new entries, which will be inserted
        if available. Additional resources can be specified via add_nodes.

        :ivar nodes.Node add_nodes: Single or List of node-instances.
        :ivar boolean insert_only: Just insert new species and skip updates if True
        :ivar boolean update_only: Just updates species and skip inserts if True
        """
        # counter to identify which entry is currently processed
        counter = 0
        # counter to count available updates
        count_updates = 0
        # list of database - nodes which are currently in the local database
        dbnodes = []
        # create an instance with all available vamdc-nodes
        nl = nodes.Nodelist()

        # attach additional nodes to the list of dbnodes (for insert)
        if not functions.isiterable(add_nodes):
            add_nodes = [add_nodes]
        for node in add_nodes:
            if node is None:
                pass
            elif not isinstance(node, nodes.Node):
                print "Could not attach node. Wrong type, it should be type <nodes.Node>"
            else:
                dbnodes.append(node)
        
        #--------------------------------------------------------------------
        # Check if updates are available for entries

        # Get list of species in the database
        cursor = self.conn.cursor()
        cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ")
        rows = cursor.fetchall()
        num_rows = len(rows)
        query = q.Query()
        request = r.Request()

        if not insert_only:
            print("----------------------------------------------------------")
            print "Looking for updates"
            print("----------------------------------------------------------")

            for row in rows:
                counter += 1
                print "%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]),
                try:
                    node = nl.getnode(str(row[4]))
                except:
                    node = None
                if node is None:
                    print " -- RESOURCE NOT AVAILABLE"
                    continue
                else:
                    if node not in dbnodes:
                        dbnodes.append(node)

                vamdcspeciesid = row[2]
                # Currently the database prefix XCDMS- or XJPL- has to be removed
                speciesid = row[1].split("-")[1]
                query_string = "SELECT ALL WHERE SpeciesID=%s" % speciesid
                request.setnode(node)
                request.setquery(query_string)

                errorcode = None
                try:
                    changedate = request.getlastmodified()
                except r.NoContentError, e:
                    # Delete entries which are not available anymore
                    if request.status == 204:
                        if delete_archived:
                            print " -- ENTRY ARCHIVED AND WILL BE DELETED -- "
                            del_specie = self.delete_species(row[1])
                            if len(del_specie) > 0:
                                print "\r Done"
                        else:
                            print " -- ENTRY ARCHIVED -- "
                        continue

                except r.TimeOutError, e:
#                    errorcode = e.strerror
#                    changedate = None
                    print " -- TIMEOUT: Could not check entry -- "
                    continue

                except:
Esempio n. 6
0
    def update_database(self, add_nodes = None, insert_only = False, update_only = False, delete_archived = False):
        """
        Checks if there are updates available for all entries. Updates will
        be retrieved from the resource specified in the database.
        All resources will be searched for new entries, which will be inserted
        if available. Additional resources can be specified via add_nodes.

        :ivar nodes.Node add_nodes: Single or List of node-instances.
        :ivar boolean insert_only: Just insert new species and skip updates if True
        :ivar boolean update_only: Just updates species and skip inserts if True
        """
        # counter to identify which entry is currently processed
        counter = 0
        # counter to count available updates
        count_updates = 0
        # list of database - nodes which are currently in the local database
        dbnodes = []
        # create an instance with all available vamdc-nodes
        nl = nodes.Nodelist()

        # attach additional nodes to the list of dbnodes (for insert)
        if not functions.isiterable(add_nodes):
            add_nodes = [add_nodes]
        for node in add_nodes:
            if node is None:
                pass
            elif not isinstance(node, nodes.Node):
                print("Could not attach node. Wrong type, it should be type <nodes.Node>")
            else:
                dbnodes.append(node)
        
        #--------------------------------------------------------------------
        # Check if updates are available for entries

        # Get list of species in the database
        cursor = self.conn.cursor()
        cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, datetime(PF_Timestamp), PF_ResourceID FROM Partitionfunctions ")
        rows = cursor.fetchall()
        num_rows = len(rows)
        query = q.Query()
        request = r.Request()

        if not insert_only:
            print("----------------------------------------------------------")
            print("Looking for updates")
            print("----------------------------------------------------------")

            for row in rows:
                counter += 1
                print("%5d/%5d: Check specie %-55s (%-15s): " % (counter, num_rows, row[0], row[1]), end=' ')
                try:
                    node = nl.getnode(str(row[4]))
                except:
                    node = None
                if node is None:
                    print(" -- RESOURCE NOT AVAILABLE")
                    continue
                else:
                    if node not in dbnodes:
                        dbnodes.append(node)

                vamdcspeciesid = row[2]
                # Currently the database prefix XCDMS- or XJPL- has to be removed
                speciesid = row[1].split("-")[1]
                query_string = "SELECT ALL WHERE SpeciesID=%s" % speciesid
                request.setnode(node)
                request.setquery(query_string)

                errorcode = None
                try:
                    changedate = request.getlastmodified()
                except r.NoContentError as e:
                    # Delete entries which are not available anymore
                    if request.status == 204:
                        if delete_archived:
                            print(" -- ENTRY ARCHIVED AND WILL BE DELETED -- ")
                            del_specie = self.delete_species(row[1])
                            if len(del_specie) > 0:
                                print("\r Done")
                        else:
                            print(" -- ENTRY ARCHIVED -- ")
                        continue

                except r.TimeOutError as e:
#                    errorcode = e.strerror
#                    changedate = None
                    print(" -- TIMEOUT: Could not check entry -- ")
                    continue

                except:
                    changedate = None
                    print("Could not retrieve information - Unexpected error:", sys.exc_info()[0])
                    continue

                tstamp = parser.parse(row[3] + " GMT")
                if changedate is None:
                    if errorcode is None:
                        errorcode = "UNKNOWN"
                    print(" -- %s (Could not retrieve information)" % errorcode)
                    continue
                if tstamp < changedate:
                    print(" -- UPDATE AVAILABLE ")
                    count_updates += 1
                    print(" -- PERFORM UPDATE -- ")
                    query_string = "SELECT SPECIES WHERE SpeciesID=%s" % speciesid
                    request.setquery(query_string)

                    result = request.dorequest()
                    try:
                        result.populate_model()
                    except:
                        print(" Error: Could not process data ")
                        continue
                    try:
                        self.insert_species_data(result.data['Molecules'], node, update = True)
                    except:
                        print(" Error: Could not update data ")
                        continue
                    print(" -- UPDATE DONE    -- ")
                else:
                    print(" -- up to date")

            if count_updates == 0:
                print("\r No updates for your entries available")
            print("Done")
        else:
            cursor.execute("SELECT distinct PF_ResourceID FROM Partitionfunctions ")
            rows = cursor.fetchall()
            for row in rows:
                try:
                    node = nl.getnode(str(row[0]))
                except:
                    node = None
                if node is None:
                    print(" -- RESOURCE NOT AVAILABLE")
                    continue
                else:
                    if node not in dbnodes:
                        dbnodes.append(node)


        if update_only:
            return
        
        # Check if there are new entries available

        #---------------------------------------------------------
        # Check all dbnodes for new species
        for node in dbnodes:
            counter = 0
            insert_molecules_list = []
            print("----------------------------------------------------------")
            print("Query '{dbname}' for new species ".format(dbname=node.name))
            print("----------------------------------------------------------")
            request.setnode(node)
            result = request.getspecies()
            for id in result.data['Molecules']:
                try:
                    cursor.execute("SELECT PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Timestamp FROM Partitionfunctions WHERE PF_SpeciesID=?", [(id)])
                    exist = cursor.fetchone()
                    if exist is None:
                        print("   %s" % result.data['Molecules'][id])
                        insert_molecules_list.append(result.data['Molecules'][id])
                        counter += 1
                except Exception as e:
                    print(e)
                    print(id)
            print("There are %d new species available" % counter)
            print("----------------------------------------------------------")
            print("Start insert")
            print("----------------------------------------------------------")           
            self.insert_species_data(insert_molecules_list, node)
            print("----------------------------------------------------------")           
            print("Done")
Esempio n. 7
0
    def insert_species_data(self, species, node, update=False):
        """
        Checks the VAMDC database node for new species and inserts them into the local database

        :ivar list species: species which will be inserted
        :ivar nodes.Node node: vamdc-node / type: instance(nodes.node)
        :ivar boolean update:  if True then all entries in the local database with the same
                               species-id will be deleted before the insert is performed.
        """

        # create a list of names. New names have not to be in that list
        names_black_list = []
        
        cursor = self.conn.cursor()
        cursor.execute("SELECT PF_Name FROM Partitionfunctions")
        rows = cursor.fetchall()
        for row in rows:
            names_black_list.append(row[0])

        #----------------------------------------------------------
        # Create a list of species for which transitions will be
        # retrieved and inserted in the database.
        # Species have to be in the Partitionfunctions - table

        if not functions.isiterable(species):
            species = [species]

        #--------------------------------------------------------------

        for specie in species:
            # if species is a dictionary (e.g. specmodel.Molecules)
            # then get the species-instance instead of only the key.
            if isinstance(species, dict):
                specie = species[specie]
                
            num_transitions = {}
            # will contain a list of names which belong to one specie
            species_names = {}
            # list will contain species whose insert-failed
            species_with_error = []

            # check if specie is of type Molecule
            if isinstance(specie, specmodel.Molecule):
                speciesid = specie.SpeciesID
                vamdcspeciesid = specie.VAMDCSpeciesID
                formula = specie.OrdinaryStructuralFormula
            if isinstance(specie, specmodel.Atom):
                speciesid = specie.SpeciesID
                vamdcspeciesid = specie.VAMDCSpeciesID
#                formula = specie.OrdinaryStructuralFormula
            else:
                try:
                    if isinstance(specie, str) and len(specie) == 27:
                        vamdcspeciesid = specie
                        speciesid = None
                except:
                    print("Specie is not of wrong type")
                    print("Type Molecule or string (Inchikey) is allowed")
                    continue
            if speciesid:
                print("Processing: {speciesid}".format(speciesid = speciesid))
            else:
                print("Processing: {vamdcspeciesid}".format(vamdcspeciesid = vamdcspeciesid))
                

            try:
                # Create query string
                query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % vamdcspeciesid
                request = r.Request()

                # Get data from the database
                request.setnode(node)
                request.setquery(query_string)

                result = request.dorequest()
                #result.populate_model()
            except Exception as e:
                print(" -- Error %s: Could not fetch and process data" % e.strerror)
                continue    
            #---------------------------------------

            cursor = self.conn.cursor()
            cursor.execute('BEGIN TRANSACTION')

            #------------------------------------------------------------------------------------------------------
            # if update is allowed then all entries in the database for the given species-id will be
            # deleted, and thus replaced by the new data
            if update:
                if speciesid is None:
                    for sid in list(result.data['Molecules'].keys()) + list(result.data['Atoms'].keys()):
                        deleted_species = self.delete_species(sid)
                        for ds in deleted_species:
                            names_black_list.remove(ds)
                else:
                    deleted_species = self.delete_species(speciesid)
                    for ds in deleted_species:
                        names_black_list.remove(ds)

            #------------------------------------------------------------------------------------------------------
            
            #------------------------------------------------------------------------------------------------------
            # Insert all transitions
            num_transitions_found = len(result.data['RadiativeTransitions'])
            counter_transitions = 0
            for trans in result.data['RadiativeTransitions']:
                counter_transitions+=1
                print("\r insert transition %d of %d" % (counter_transitions, num_transitions_found), end=' ')
                # data might contain transitions for other species (if query is based on ichikey/vamdcspeciesid).
                # Insert transitions only if they belong to the correct specie

                if result.data['RadiativeTransitions'][trans].SpeciesID == speciesid or speciesid is None:
                    id = str(result.data['RadiativeTransitions'][trans].SpeciesID)
                    # if an error has occured already then there will be no further insert
                    if id in species_with_error:
                        continue

                    # Get upper and lower state from the states table
                    try:
                        upper_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].UpperStateRef]
                        lower_state = result.data['States']["%s" % result.data['RadiativeTransitions'][trans].LowerStateRef]
                    except (KeyError, AttributeError):
                        print(" -- Error: State is missing")
                        species_with_error.append(id)
                        continue

                    if id in result.data['Atoms'].keys():
                        is_atom = True
                        is_molecule = False
                        atomname = self.createatomname(result.data['Atoms'][id])
                    elif id in result.data['Molecules'].keys():
                        is_atom = False
                        is_molecule = True
                        formula = str(result.data['Molecules'][id].OrdinaryStructuralFormula)

                        # Get string which identifies the vibrational states involved in the transition
                        t_state = self.getvibstatelabel(upper_state, lower_state)
                        
                    else:
                        continue
                                                
                    # Get hyperfinestructure info if hfsInfo is None
                    # only then the hfsInfo has not been inserted in the species name
                    # (there can be multiple values in the complete dataset
                    t_hfs = ''
                    try:
                        for pc in result.data['RadiativeTransitions'][trans].ProcessClass:
                            if str(pc)[:3] == 'hyp':
                                t_hfs = str(pc)
                    except Exception as e:
                            print("Error: %s", e)

                    frequency = float(result.data['RadiativeTransitions'][trans].FrequencyValue)
                    try:
                        uncertainty = "%lf" % float(result.data['RadiativeTransitions'][trans].FrequencyAccuracy)
                    except TypeError:
                        print(" -- Error uncertainty not available")
                        species_with_error.append(id)
                        continue

                    # Get statistical weight if present
                    try:
                        weight = int(upper_state.TotalStatisticalWeight)
                    except:
                        print(" -- Error statistical weight not available")
                        species_with_error.append(id)
                        continue

                    # Get nuclear spin isomer (ortho/para) if present
                    try:
                        nsiName = upper_state.NuclearSpinIsomerName
                    except AttributeError:
                        nsiName = None

                    # if nuclear spin isomer is defined then two entries have to be generated
                    if nsiName is not None and nsiName != '':
                        nsinames = [nsiName, None]
                        nsiStateOrigin = result.data['States']["%s" % upper_state.NuclearSpinIsomerLowestEnergy]
                        nsiEnergyOffset = float(nsiStateOrigin.StateEnergyValue)
                    else:
                        nsinames = [None]

                    for nsiName in nsinames:
                        # create name
                        if is_atom == True:
                            t_name = atomname
                        else:
                            t_affix = ";".join([affix for affix in [t_hfs, nsiName] if affix is not None and affix!=''])
                            t_name = "%s;%s;%s" % (formula, t_state, t_affix)
                        t_name = t_name.strip()
                        # remove all blanks in the name
                        t_name = t_name.replace(' ','')
                        # check if name is in the list of forbidden names and add counter if so
                        i = 1
                        while t_name in names_black_list:
                            t_name = "%s#%d" % (t_name.split('#')[0], i)
                            i += 1
                        # update list of distinct species names.
                        if id in species_names:
                            if not t_name in species_names[id]:
                                species_names[id].append(t_name)
                                num_transitions[t_name] = 0
                        else:
                            species_names[id] = [t_name]
                            num_transitions[t_name] = 0

                        if nsiName is not None:
                            lowerStateEnergy = float(lower_state.StateEnergyValue) - nsiEnergyOffset
                        else:
                            lowerStateEnergy = float(lower_state.StateEnergyValue)
                            
                        
                        # Insert transition into database
                        try:
                            cursor.execute("""INSERT INTO Transitions (
                            T_Name,
                            T_Frequency,
                            T_EinsteinA,
                            T_Uncertainty,
                            T_EnergyLower,
                            T_UpperStateDegeneracy,
                            T_HFS,
                            T_UpperStateQuantumNumbers,
                            T_LowerStateQuantumNumbers) VALUES
                            (?, ?,?,?,?, ?,?, ?,?)""",
                                           (t_name,
                                            "%lf" % frequency,
                                            "%g" % float(result.data['RadiativeTransitions'][trans].TransitionProbabilityA),
                                            uncertainty, "%lf" % lowerStateEnergy,
                                            weight,
                                            #upper_state.QuantumNumbers.case,
                                            t_hfs,
                                            str(upper_state.QuantumNumbers.qn_string),
                                            str(lower_state.QuantumNumbers.qn_string),
                                            ))
                            num_transitions[t_name] += 1
                        except Exception as e:
                            print("Transition has not been inserted:\n Error: %s" % e)
            print("\n")
            #------------------------------------------------------------------------------------------------------

            #------------------------------------------------------------------------------------------------------
            # delete transitions for all entries where an error occured during the insert
            for id in species_with_error:
                print(" -- Species {id} has not been inserted due to an error ".format(id=str(id)))
                try:
                    for name in species_names[id]:
                        cursor.execute("DELETE FROM Transitions WHERE T_Name=?", (str(name),))
                        print(" --    {name} ".format(name=str(name)))
                except:
                    pass

            #------------------------------------------------------------------------------------------------------
            # insert specie in Partitionfunctions (header) table
            if node:
                resourceID = node.identifier
                url = node.url
            else:
                resourceID = 'NULL'
                url = 'NULL'
                

            # Insert molecules
            for id in species_names:
                if id in species_with_error:
                    continue
                for name in species_names[id]:
                    # determine hyperfine-structure affix and nuclear spin isomer affix
                    try:
                        hfs = ''
                        nsi = ''
                        for affix in name.split("#")[0].split(';',2)[2].split(";"):
                            if affix.strip()[:3] == 'hyp':
                                hfs = affix.strip()
                            else:
                                # if affix does not identify hyperfine structure
                                # it identifies the nuclear spin isomer
                                nsi = affix.strip()
                    except:
                        hfs = ''

                    # Insert row in partitionfunctions
                    try:
                        if id in result.data['Atoms']:
                            if 'Comment' not in result.data['Atoms'][id].__dict__:
                                result.data['Atoms'][id].Comment = ""
                            cursor.execute("INSERT INTO Partitionfunctions (PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_Comment, PF_ResourceID, PF_URL, PF_Timestamp) VALUES (?,?,?,?,?,?,?)",
                                           ("%s" % name,
                                            id,
                                            "%s" % (result.data['Atoms'][id].VAMDCSpeciesID),
                                            "%s" % (result.data['Atoms'][id].Comment),
                                            resourceID,
                                            "%s%s%s" % (url, "sync?LANG=VSS2&amp;REQUEST=doQuery&amp;FORMAT=XSAMS&amp;QUERY=Select+*+where+SpeciesID%3D", id),
                                            datetime.now(), ))
                        else:
                            cursor.execute("INSERT INTO Partitionfunctions (PF_Name, PF_SpeciesID, PF_VamdcSpeciesID, PF_HFS, PF_NuclearSpinIsomer, PF_Comment, PF_ResourceID, PF_URL, PF_Timestamp) VALUES (?,?,?,?,?,?,?,?,?)",
                                           ("%s" % name,
                                            id,
                                            "%s" % (result.data['Molecules'][id].VAMDCSpeciesID),
                                            hfs,
                                            nsi, 
                                            "%s" % (result.data['Molecules'][id].Comment),
                                            resourceID,
                                            "%s%s%s" % (url, "sync?LANG=VSS2&amp;REQUEST=doQuery&amp;FORMAT=XSAMS&amp;QUERY=Select+*+where+SpeciesID%3D", id),
                                            datetime.now(), ))
                    except sqlite3.Error as e:
                        print("An error occurred:", e.args[0])
                    except Exception as e:
                        print("An error occurred:", e.args[0])
                        print(list(result.data['Molecules'].keys()))

                # Update Partitionfunctions
                if id in result.data['Atoms'].keys():
                    for temperature in Temperatures:
                        pf_values = specmodel.calculate_partitionfunction(result.data['States'], temperature = temperature)
                        try:
                            field = ("PF_%.3lf" % float(temperature)).replace('.', '_')
                            sql = "UPDATE Partitionfunctions SET %s=? WHERE PF_SpeciesID=? " % field
                            cursor.execute(sql, (pf_values[id], id))
                        except Exception as e:
                            print("SQL-Error: %s " % sql)
                            print(pf_value, id)
                            print("Error: %d: %s" % (e.args[0], e.args[1]))
                else:
                    try:
                        for pfs in result.data['Molecules'][id].PartitionFunction:
                            if 'NuclearSpinIsomer' not in pfs.__dict__:
                                nsi = ''
                            else:
                                nsi = pfs.NuclearSpinIsomer  
                            for temperature in pfs.values.keys():

                                try:
                                    field = ("PF_%.3lf" % float(temperature)).replace('.', '_')
                                    sql = "UPDATE Partitionfunctions SET %s=? WHERE PF_SpeciesID=? AND IFNULL(PF_NuclearSpinIsomer,'')=?" % field
                                    cursor.execute(sql, (pfs.values[temperature], id, nsi))
                                except Exception as e:
                                    print("SQL-Error: %s " % sql)
                                    print(pfs.values[temperature], id)
                                    print("Error: %d: %s" % (e.args[0], e.args[1]))
                    except:
                        pass
            #------------------------------------------------------------------------------------------------------

            for row in num_transitions:
                print("      for %s inserted %d transitions" % (row, num_transitions[row]))
            self.conn.commit()
            cursor.close()