def import_hitran_and_populate_particles_table():
    start_time = time.time()
    '''
    #change file to loadable version
    with open('/home/toma/Desktop/molecule_properties.txt', 'r') as f: 
        outfile = open('/home/toma/Desktop/molecule_properties (copy).txt', 'w')
        for line in f: 
            data = line.split()
            data.pop(1)
            for i in data:
                outfile.write("%s " % i)
            outfile.write('\n')
        outfile.close()
    f.close()
    '''

    version_name = 'HITRAN_2016'
    reference_link = r'https://www.sciencedirect.com/science/article/pii/S0022407317301073?via%3Dihub'

    #everything is in string though to be noticed
    #length of lists are 124
    mol_ids, iso_ids, iso_names, iso_abundances, iso_masses, mol_names = \
    np.loadtxt('/home/toma/Desktop/molecule_properties (copy).txt', dtype='str', skiprows=1, usecols=(1, 2, 3, 4, 5, 6), unpack=True)

    for i in range(len(mol_ids)):

        particle_property_query = "INSERT INTO particles VALUES('%s', '%s', '%s', '%s', '%s', null);" % (mol_names[i], iso_names[i], \
                                                               iso_abundances[i], iso_masses[i], 1) #this one is temporary
        #insert each molecule's properties into particles table
        sql_order(particle_property_query)

        #then, fetch all the data from HITRAN using HAPI
        hapi.db_begin('data')
        #becasue cannot choose inifinity as upper limit, use a giant number instead
        #gpp is upper state degeneracy
        hapi.fetch(mol_names[i], int(mol_ids[i]), int(iso_ids[i]), 0, 1e9, Parameters=['nu', 'a', 'gamma_air', 'n_air', 'delta_air', \
                   'elower', 'gpp', 'gamma_H2', 'n_H2', 'delta_H2', 'gamma_He', 'n_He', 'delta_He'])

        #open the file and use insert_hitran.py to insert all parameters into transitions table
        filename = '/home/toma/Desktop/linelists-database/data/{}.data'.format(
            mol_names[i])

        insert_hitran.insert_hitran(filename, version_name, i + 1,
                                    reference_link)

        #delete the files since the files are named by HAPI using mol_name instead of iso_name
        #s.t. python wont get confused in the for loop
        header_filename = '/home/toma/Desktop/linelists-database/data/{}.header'.format(
            mol_names[i])
        os.remove(filename)
        os.remove(header_filename)

    print("Finished in %s seconds" % (time.time() - start_time))
def correct_default_line_source_id():

    start_time = time.time()

    total_particle_id = 242
    for i in range(total_particle_id):
        particle_id = i + 1
        print(particle_id)
        #get all the sources for that isotopologue
        default_line_source_id = -1
        get_line_sources = "SELECT line_source, line_source_id FROM source_properties WHERE particle_id = {};".format(
            particle_id)
        sources = fetch(get_line_sources)
        print(sources)
        has_exomol = False
        exomol_id = -1
        has_hitemp = False
        hitemp_id = -1
        has_hitran = False
        hitran_id = -1
        for one_source in sources:
            source_name = one_source[0]
            if source_name.startswith('EXOMOL'):
                has_exomol = True
                exomol_id = one_source[1]
            elif source_name.startswith('HITEMP'):
                has_hitemp = True
                hitemp_id = one_source[1]
            elif source_name.startswith('HITRAN'):
                has_hitran = True
                hitran_id = one_source[1]
        if not has_exomol and not has_hitemp and not has_hitran:
            raise Exception(
                'Oh Damn this isotopologue has none of the versions HITRAN, HITEMP, or EXOMOL...umm problematic~'
            )
        if has_exomol:
            default_line_source_id = exomol_id
        elif not has_exomol and has_hitemp:
            default_line_source_id = hitemp_id
        else:  #only HITRAN
            default_line_source_id = hitran_id
        print(default_line_source_id)
        update_default_line_source_id = 'UPDATE particles SET default_line_source_id = {} WHERE particle_id = {};'.format(
            default_line_source_id, particle_id)
        sql_order(update_default_line_source_id)
        print('Finished correcting particle ' + str(particle_id))

    print("Finished in %s seconds" % (time.time() - start_time))
        compressed))
    '''
    #can run out of memory
    file = request.urlopen(bz2_url)
    #decompressor = bz2.BZ2Decompressor()
    compressed_data = file.read()
    data = bz2.decompress(compressed_data)
    #file_lines = data.decode()
    #store file info in outfile
    outfile = open('{}'.format(outfile_name), 'wb')
    outfile.write(data)
    outfile.close()
    '''


##################
if __name__ == '__main__':

    #disable autocommit to improve performance
    sql_order('SET autocommit = 0')
    sql_order('SET unique_checks = 0')
    sql_order('SET foreign_key_checks = 0')
    sql_order('SET sql_log_bin = 0')

    populate_all_exomol()

    #turn them back on
    sql_order('SET unique_checks = 1')
    sql_order('SET foreign_key_checks = 1')
    sql_order('SET sql_log_bin = 1')
def insert_hitran(filename, version_name, particle_id, reference_link):
    #connect to the database
    db = MySQLdb.connect(host='localhost',
                         user='******',
                         passwd='Happy810@',
                         db='linelist')

    #create a cursor object
    cursor = db.cursor()

    #disable autocommit to improve performance
    sql_order('SET autocommit = 0')
    sql_order('SET unique_checks = 0')
    sql_order('SET foreign_key_checks = 0')
    sql_order('SET sql_log_bin = 0')

    #insert the data of all lines for CO into table lines
    # with open('CO(copy).out') as infile: #
    try:
        #insert the line_source into source_properties and get line_source_id
        insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \
        bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'YES', 'YES', 'YES', '%s', \
        '%s', null);" % (version_name, reference_link, particle_id)

        sql_order(insert_version_query)

        get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \
        particle_id = {}".format(version_name, particle_id)

        data = fetch(get_line_source_id_query)

        if len(data) != 1:
            raise Exception(
                'should have exactly one line_source_id corresponding to one line_source'
            )

        line_source_id = data[0][0]

        #file that the parameters are written into and import to mysql using LOAD DATA INFILE
        f = open('/home/toma/Desktop/hitran.txt', 'w')

        #open the file
        infile = open(filename)

        counter = 0

        #create a list of all the queries to bulk insert it
        #bulk_data = []
        #query = "INSERT INTO transitions VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'HITRAN_2016', 1, null)"

        for line in infile:
            data = line.strip().split(',')
            for i in range(len(data)):
                if data[i] == '#':
                    data[i] = '\\N'
            #line table arrangement corresponding to tuple indexes:
            #(nu, a, gamma_air, n_air, delta_air, elower, g_upper, gamma_H2, n_H2, delta_H2, gamma_He, n_He, delta_He, line_source_id, particle_id, line_id)
            #( 0, 1,      2,       3,       4,        5,    6,    7,       8,      9,       10,      11,    12,        13,         14,         15  )

            #make sure at least one gamma and one n value is not null
            if data[2] == '\\N' and data[7] == '\\N' and data[10] == '\\N':
                raise Exception('should have at least one gamma value')
            if data[3] == '\\N' and data[8] == '\\N' and data[11] == '\\N':
                raise Exception('should have at least one n value')

            #write into infile the parameters for each line
            for item in data:
                f.write("%s " % item)
            f.write("\n")

            counter += 1

        f.close()
        print("Bulk inserting hitran data...")

        cursor.execute(
            "LOAD DATA LOCAL INFILE '/home/toma/Desktop/hitran.txt' INTO TABLE transitions FIELDS TERMINATED BY ' ' LINES TERMINATED BY '\n' \
                  (@col1, @col2, @col3, @col4, @col5, @col6, @col7, @col8, @col9, @col10, @col11, @col12, @col13) SET nu=@col1, A=@col2, gamma_air=@col3, \
                  n_air=@col4, delta_air=@col5, elower=@col6, g_upper=@col7, gamma_H2=@col8, n_H2=@col9, delta_H2=@col10, gamma_He=@col11, n_He=@col12, \
                  delta_He=@col13, line_source_id={}, particle_id={};".format(
                line_source_id, particle_id))

        #commit changes and close file
        db.commit()
        infile.close()

        #turn it back on
        sql_order('SET unique_checks = 1')
        sql_order('SET foreign_key_checks = 1')
        sql_order('SET sql_log_bin = 1')

        print('Executed {} lines of hitran data'.format(counter))

    except Exception as e:
        #if errors occur
        db.rollback()
        print('insert hitran data failed', e)

    finally:
        #close up cursor and connection
        cursor.close()
        db.close()
Ejemplo n.º 5
0
def import_exomol_data(mol_name, iso_name, version_name, trans_fp, states_fp, partitions_fp, \
                       broad_H2_fp, broad_He_fp, default_gamma, default_n, trans_file_num, reference_link):
    
    ###################
    
    if default_gamma is None:
        default_gamma = '\\N'
        print('Oh Damn this isotologue has no gamma at all in exomol data')
    if default_n is None: 
        default_n = '\\N'
        print('Oh Damn this isotologue has no N at all in exomol data')
        
    ###################
    
    one_iso_time = time.time()  
    #connect to the database
    db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') 
    #create a cursor object
    cursor = db.cursor()
    #disable autocommit to improve performance
    #sql_order('SET autocommit = 0')
    #sql_order('SET unique_checks = 0')
    #sql_order('SET foreign_key_checks = 0')
    #sql_order('SET sql_log_bin = 0')
    
    ##################
    
    #get particle_id
    get_particle_id = "SELECT particle_id FROM particles WHERE iso_name = '{}'".format(iso_name)
    check = fetch(get_particle_id)        
    if check == (): #if the particle is not yet in the particle table, insert it
        #need to update the particle later...insert 0 for now
        particle_property_query = "INSERT INTO particles VALUES('%s', '%s', '%s', '%s', '%s', null);" % (mol_name, iso_name, \
                                                               0, 0, 1) #this 1 is temporary   
        sql_order(particle_property_query)
    #now get particle_id
    data = fetch(get_particle_id)
    if len(data) != 1:
        raise Exception('iso_name should correspond to exactly one isotopologue in the database')
    particle_id = data[0][0]
    
    #load H2/He params and in the mean while
    #insert the line_source into source_properties and get line_source_id
    if broad_H2_fp is None and broad_He_fp is None: #when no .broad files in exomol
        no_broadening_param = True
        
        insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \
        bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'NO', 'NO', 'NO', '%s', \
        '%s', null);" % (version_name, reference_link, particle_id)
        H2_dict = None
        He_dict = None
        
    elif broad_H2_fp is not None and broad_He_fp is not None: #when both H2 and He .broad files in exomol
        no_broadening_param = False
        H2_dict = temp_broad_param_dict(broad_H2_fp)
        He_dict = temp_broad_param_dict(broad_He_fp)
        
        insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \
        bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'NO', 'YES', 'YES', '%s', \
        '%s', null);" % (version_name, reference_link, particle_id)
        
    else: 
        raise Exception('Should have either neither or both of the H2 and He broad param files')
    
    get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \
    particle_id = {}".format(version_name, particle_id)
    
    output = fetch(get_line_source_id_query)
    if output != (): #if source was inserted already            
        line_source_id = output[0][0]
        
    else: #insert the source and get the source id
        #insert the line_source into source_properties and get line_source_id
        sql_order(insert_version_query)
    
        data = fetch(get_line_source_id_query)

        if len(data) != 1:
            raise Exception('should have exactly one line_source_id corresponding to one line_source')
            
        line_source_id = data[0][0]
    
    #####################
    
    #insert partitions
    insert_partitions(partitions_fp, line_source_id, particle_id)
    db.commit()
    
    #load states
    states_time = time.time()
    #get parameters needed to insert exomol data into transitions
    print('Loading huge ass states file')
    #states in id order starts in 1
    
    
    #for all files this is true      
    Es, gs, Js= np.loadtxt(states_fp, usecols=(1, 2, 3), unpack=True)
        
    if no_broadening_param is False:
        has_K = False
        for key in H2_dict.keys(): 
            if '_' in key: 
                has_K = True
        for key in He_dict.keys(): 
            if '_' in key: 
                has_K = True
        
        #load or not load Ks
        if has_K is True: #when contain 'a1' or sth
            if mol_name == 'H2O': ##version specification for H2O
                if version_name == 'EXOMOL_POKAZATEL': 
                    Ks = np.loadtxt(states_fp, usecols=4, unpack=True, dtype=np.str)
                elif version_name == 'EXOMOL_BT2': 
                    Ks = np.loadtxt(states_fp, usecols=13, unpack=True, dtype=np.str)                    
                elif version_name == 'EXOMOL_HotWat78': 
                    Ks = np.loadtxt(states_fp, usecols=4, unpack=True, dtype=np.str)
                elif version_name == 'EXOMOL_VTT': 
                    Ks = np.loadtxt(states_fp, usecols=11, unpack=True, dtype=np.str)
                else: 
                    raise Exception('Should not have versions other than POKAZATEL, BT2, HotWat78, and VTT for H2O in EXOMOL')
            else: #cases like PH3 and CH4
                Ks = np.loadtxt(states_fp, usecols=6, unpack=True, dtype=np.str) 
        else: 
            Ks = None    
    else: #no broadening param
        Ks = None
    print('Finished loading states file in %s seconds' % (time.time() - states_time))                    
    
    ######################
    
    #insert transition files
    counter = 0 
    
    for file_num in range(1, trans_file_num + 1):
        '''
        standard error control haha
        if iso_name == '' and version_name == '':
            if file_num <= 0: 
                continue
        '''
        curr_file = trans_fp + str(file_num)  
        #get the number of lines in trans file
        length_trans = sum(1 for line in open(curr_file))
        print(length_trans, 'lines : Opened the transition file')
        
        with open(curr_file) as trans:
            #for spliiting file into smalller chunks...but mysql auto_increment seems to not be working properly
            start_line = 0
            max_size = 1e7            
            repeat = 0
            
            while length_trans >= start_line + max_size: 
                counter += insert_exomol(cursor, H2_dict, He_dict, Es, gs, Js, Ks, start_line, int(start_line + max_size), '/home/toma/Desktop/exomol.txt', \
                                         trans, line_source_id, particle_id, default_gamma, default_n, no_broadening_param)
                #islice starts from the next line after the last read line
                length_trans -= max_size
                #print(int(length_trans))
                repeat += 1
            
            #out of the while loop when difference between start_line and the max lines in trans file is less than max_size
            counter += insert_exomol(cursor, H2_dict, He_dict, Es, gs, Js, Ks, start_line, int(length_trans), '/home/toma/Desktop/exomol.txt', \
                                     trans, line_source_id, particle_id, default_gamma, default_n, no_broadening_param)
            
        #commit one file altogether at one time
        db.commit()
        trans.close()
        
        print('Finished loading {} with {} lines of data'.format(curr_file, int(length_trans + repeat * max_size)))        
        
        #set @id:=0; update mytable set id = (@id := @id + 1) order by id; for correcting auto_increment if needed
    
    #turn them back on
    #sql_order('SET unique_checks = 1')
    #sql_order('SET foreign_key_checks = 1')
    #sql_order('SET sql_log_bin = 1')
    
    cursor.close()
    db.close()
    
    print("Finished inserting", counter, "lines of exomol", version_name, "data for", iso_name, "in %s seconds" % (time.time() - one_iso_time))
Ejemplo n.º 6
0
def insert_hitemp(fp, isotop_name, line_source, ref_link):

    insert_time = time.time()

    #connect to the database
    db = MySQLdb.connect(host='localhost',
                         user='******',
                         passwd='Happy810@',
                         db='linelist')

    #create a cursor object
    cursor = db.cursor()

    #disable autocommit to improve performance
    sql_order('SET autocommit = 0')
    sql_order('SET unique_checks = 0')
    sql_order('SET foreign_key_checks = 0')
    sql_order('SET sql_log_bin = 0')

    try:
        #get particle id
        particle_id = get_particle(isotop_name)[0]

        get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \
        particle_id = {}".format(line_source, particle_id)

        output = fetch(get_line_source_id_query)
        if output != ():  #source inserted already
            line_source_id = output[0][0]

        else:  #insert the source and get the source id
            #insert the line_source into source_properties and get line_source_id
            insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \
            bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'YES', 'NO', 'NO', '%s', \
            '%s', null);" % (line_source, ref_link, particle_id)

            sql_order(insert_version_query)

            get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \
            particle_id = {}".format(line_source, particle_id)

            data = fetch(get_line_source_id_query)

            if len(data) != 1:
                raise Exception(
                    'should have exactly one line_source_id corresponding to one line_source'
                )

            line_source_id = data[0][0]

        file_length = sum(1 for line in open(fp))

        print("Bulk inserting hitemp data...")
        cursor.execute("LOAD DATA LOCAL INFILE '{}' INTO TABLE transitions FIELDS TERMINATED BY ' ' LINES TERMINATED BY '\n' \
                       (@col1, @col2, @col3, @col4, @col5, @col6, @col7) SET nu=@col1, A=@col2, gamma_air=@col3, n_air=@col4, \
                       delta_air=@col5, elower=@col6, g_upper=@col7, line_source_id={}, particle_id={};"                                                                                                        .format(fp, \
                       line_source_id, particle_id))

        #commit changes
        db.commit()

        #turn it back on
        sql_order('SET unique_checks = 1')
        sql_order('SET foreign_key_checks = 1')
        sql_order('SET sql_log_bin = 1')

        print('Executed {} lines of hitemp data'.format(file_length))

    except Exception as e:
        #if errors occur
        db.rollback()
        print('insert hitemp data failed', e)

    finally:
        #close up cursor and connection
        cursor.close()
        db.close()

    print("Finished in %s seconds" % (time.time() - insert_time))