def insert_partition_for_hitran(): start_time = time.time() global_ids, iso_names = np.loadtxt('/home/toma/Desktop/molecule_properties (copy).txt', dtype=str, skiprows=1, usecols=(0, 3), unpack=True) for i in range(len(global_ids)): global_id = global_ids[i] iso_name = iso_names[i] if iso_name == '(16O)': continue #since it has no partitons file on HITRAN website #get particle_id get_particle_id = "SELECT particle_id FROM particles WHERE iso_name = '{}'".format(iso_name) data = fetch(get_particle_id) if data == (): print('empty moelcule encountered') continue #note for ClONO2, SF6 and CF4 #because only external links to HITRAN 2012 data of those molecules are given particle_id = data[0][0] #get hitran line source id for that isotopologue get_line_sources = "SELECT line_source, line_source_id FROM source_properties WHERE particle_id = {}".format(particle_id) sources = fetch(get_line_sources) hitran_id = -1 for one_source in sources: source_name = one_source[0] if source_name.startswith('HITRAN'): hitran_id = one_source[1] if hitran_id == -1: raise Exception('Oh Damn this isotopologue is in HITRAN but has no HITRAN lines in the database~') print(global_id, iso_name, particle_id, hitran_id) #download partition file url = 'https://hitran.org/data/Q/q{}.txt'.format(global_id) filename = iso_name + '_hitran_partiitons' download_file(url, filename) download_file(url, filename + '_modified') #modify the file becasue some of the files have the same problem as the hitemp ones, merging two columns modified_file = open('/home/toma/Desktop/linelists-database/' + filename + '_modified', 'w') with open('/home/toma/Desktop/linelists-database/' + filename) as file: for line in file: T = line[:4] par = line[4:] modified_file.write(T + ' ' + par + '\n') modified_file.close() file.close() #insert the partition file insert_partitions('/home/toma/Desktop/linelists-database/' + filename + '_modified', hitran_id, particle_id) print('Finished inserting hitran partitions for ' + iso_name) print("Finished in %s seconds" % (time.time() - start_time))
def get_particle(iso_name): query = "SELECT particle_id, iso_abundance, iso_mass FROM particles WHERE iso_name = '{}'".format( iso_name) data = fetch(query) if len(data) != 1: raise Exception('should have exactly one row for a specific particle') #data[0] = (particle_id, iso_abundance, iso_mass) return data[0]
def get_partition(T, line_source_id, particle_id): #temp has to be a float i.g. 19.0 print(T, line_source_id, particle_id) #query for the partition function given T, temperature query = "SELECT `partition` FROM partitions WHERE temperature = {} AND line_source_id = {} AND particle_id = {}".format( T, line_source_id, particle_id) data = fetch(query) if len(data) != 1: raise Exception( 'should have exactly one partition value given a specific T and line source' ) return data[0][0]
def correct_default_line_source_id(): start_time = time.time() total_particle_id = 242 for i in range(total_particle_id): particle_id = i + 1 print(particle_id) #get all the sources for that isotopologue default_line_source_id = -1 get_line_sources = "SELECT line_source, line_source_id FROM source_properties WHERE particle_id = {};".format( particle_id) sources = fetch(get_line_sources) print(sources) has_exomol = False exomol_id = -1 has_hitemp = False hitemp_id = -1 has_hitran = False hitran_id = -1 for one_source in sources: source_name = one_source[0] if source_name.startswith('EXOMOL'): has_exomol = True exomol_id = one_source[1] elif source_name.startswith('HITEMP'): has_hitemp = True hitemp_id = one_source[1] elif source_name.startswith('HITRAN'): has_hitran = True hitran_id = one_source[1] if not has_exomol and not has_hitemp and not has_hitran: raise Exception( 'Oh Damn this isotopologue has none of the versions HITRAN, HITEMP, or EXOMOL...umm problematic~' ) if has_exomol: default_line_source_id = exomol_id elif not has_exomol and has_hitemp: default_line_source_id = hitemp_id else: #only HITRAN default_line_source_id = hitran_id print(default_line_source_id) update_default_line_source_id = 'UPDATE particles SET default_line_source_id = {} WHERE particle_id = {};'.format( default_line_source_id, particle_id) sql_order(update_default_line_source_id) print('Finished correcting particle ' + str(particle_id)) print("Finished in %s seconds" % (time.time() - start_time))
def insert_partitions(partitions_filepath, line_source_id, particle_id): Ts, partition_functions = np.loadtxt(partitions_filepath, usecols=(0, 1), unpack=True) partition_data = [] query_insert_partitions = "INSERT INTO partitions (temperature, `partition`, line_source_id, particle_id, \ partition_id) VALUES(%s, %s, {}, {}, null)".format(line_source_id, particle_id) check = fetch("SELECT `partition` FROM partitions where line_source_id = {} and particle_id = {}".format(line_source_id, particle_id)) if check == (): #haven't inserted partition counter = 0 for j in range(len(partition_functions)): T = Ts[j] partition = partition_functions[j] partition_data.append((T, partition)) counter += 1 print("Bulk inserting partition data...") print("Executed {} lines of partition data".format(counter)) sql_bulk_order(query_insert_partitions, partition_data) else: #partition for this isotopologue and this source already in database print('Partitions for this isotopologue and this exomol version already exist in database')
def insert_hitran(filename, version_name, particle_id, reference_link): #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #create a cursor object cursor = db.cursor() #disable autocommit to improve performance sql_order('SET autocommit = 0') sql_order('SET unique_checks = 0') sql_order('SET foreign_key_checks = 0') sql_order('SET sql_log_bin = 0') #insert the data of all lines for CO into table lines # with open('CO(copy).out') as infile: # try: #insert the line_source into source_properties and get line_source_id insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'YES', 'YES', 'YES', '%s', \ '%s', null);" % (version_name, reference_link, particle_id) sql_order(insert_version_query) get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(version_name, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception( 'should have exactly one line_source_id corresponding to one line_source' ) line_source_id = data[0][0] #file that the parameters are written into and import to mysql using LOAD DATA INFILE f = open('/home/toma/Desktop/hitran.txt', 'w') #open the file infile = open(filename) counter = 0 #create a list of all the queries to bulk insert it #bulk_data = [] #query = "INSERT INTO transitions VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 'HITRAN_2016', 1, null)" for line in infile: data = line.strip().split(',') for i in range(len(data)): if data[i] == '#': data[i] = '\\N' #line table arrangement corresponding to tuple indexes: #(nu, a, gamma_air, n_air, delta_air, elower, g_upper, gamma_H2, n_H2, delta_H2, gamma_He, n_He, delta_He, line_source_id, particle_id, line_id) #( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) #make sure at least one gamma and one n value is not null if data[2] == '\\N' and data[7] == '\\N' and data[10] == '\\N': raise Exception('should have at least one gamma value') if data[3] == '\\N' and data[8] == '\\N' and data[11] == '\\N': raise Exception('should have at least one n value') #write into infile the parameters for each line for item in data: f.write("%s " % item) f.write("\n") counter += 1 f.close() print("Bulk inserting hitran data...") cursor.execute( "LOAD DATA LOCAL INFILE '/home/toma/Desktop/hitran.txt' INTO TABLE transitions FIELDS TERMINATED BY ' ' LINES TERMINATED BY '\n' \ (@col1, @col2, @col3, @col4, @col5, @col6, @col7, @col8, @col9, @col10, @col11, @col12, @col13) SET nu=@col1, A=@col2, gamma_air=@col3, \ n_air=@col4, delta_air=@col5, elower=@col6, g_upper=@col7, gamma_H2=@col8, n_H2=@col9, delta_H2=@col10, gamma_He=@col11, n_He=@col12, \ delta_He=@col13, line_source_id={}, particle_id={};".format( line_source_id, particle_id)) #commit changes and close file db.commit() infile.close() #turn it back on sql_order('SET unique_checks = 1') sql_order('SET foreign_key_checks = 1') sql_order('SET sql_log_bin = 1') print('Executed {} lines of hitran data'.format(counter)) except Exception as e: #if errors occur db.rollback() print('insert hitran data failed', e) finally: #close up cursor and connection cursor.close() db.close()
def new_compute_all(v, T, p, iso_name, line_source='default'): #get particle_id and iso_abundance using the correct function print(len(v)) particle_data = get_particle(iso_name) particle_id = particle_data[0] iso_abundance = particle_data[1] iso_mass = particle_data[2] print(particle_id, iso_abundance, iso_mass) ''' ##delete later###############use this temporarily for testing get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' and \ particle_id = {}".format(line_source, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception('should have exactly one line_source_id corresponding to one line_source and isotopologue') line_source_id = data[0][0] print(line_source_id) ''' #get line source id if line_source == 'default': line_source_id = fetch( "SELECT default_line_source_id FROM particles WHERE particle_id = {}" .format(particle_id))[0][0] else: get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' and \ particle_id = {}".format(line_source, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception( 'should have exactly one line_source_id corresponding to one line_source and isotopologue' ) line_source_id = data[0][0] print(line_source_id) #if computing using hitemp data, use hitran partitions, so get hitran line_source_id for partitions if 'HITEMP' in line_source: get_hitran_source_id_query = "SELECT line_source, line_source_id FROM source_properties WHERE particle_id = {}".format( particle_id) sources = fetch(get_hitran_source_id_query) hitran_id = -1 for source in sources: if source[0].startswith('HITRAN'): hitran_id = source[1] if hitran_id == -1: raise Exception( 'This isotopologue has hitemp but no hitran linelist which is weird' ) #use hitran id to get partitions for hitemp Q = get_partition(T, hitran_id, particle_id) else: #for other sources, use line source id to get partitions #get paritition using the correct function Q = get_partition(T, line_source_id, particle_id) print(Q) ''' #Q = 162879.38910000 #NO2 Q = 152.18884000 #H2O at 270K print(Q) ''' fetch_time = time.time() #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #do put actual password when run #create a cursor object cursor = db.cursor() #query for all the lines of the specified isotopologue from the user given nu, line_sources query = "SELECT nu, A, gamma_air, n_air, delta_air, elower, g_upper, gamma_H2, \ n_H2, delta_H2, gamma_He, n_He, delta_He FROM transitions WHERE particle_id = {} AND \ line_source_id = '{}' ORDER BY nu".format(particle_id, line_source_id) #this gives us a table of all the parameters we desire in a table in mysql cursor.execute(query) print('Finished querying and fetching line list in %s seconds' % (time.time() - fetch_time)) ''' #ran out of ~10GB of memory lines_table = cursor.fetchall() lines_array = np.asarray(lines_table) print('Finished querying and fetching line list in %s seconds' % (time.time() - fetch_time)) absorption_cross_section = np.zeros(len(v)) print(len(v)) for i in range(len(v)): print(i) absorption_cross_section[i] = compute_one_wavenum(v[i], T, p, iso_abundance, iso_mass, Q, lines_array) ''' num_rows = int(5e6) counter = 0 ''' #start = 0 #all_lines_array = np.load('/home/toma/Desktop/linelists-database/(14N)(16O)2 .npy') all_lines_array = np.loadtxt('/home/toma/Desktop/(1H)2(16O)_database_foramt', usecols=(0,1,2,3,4,5,6), skiprows=1) print(all_lines_array.shape) #sort the lines #argsort = np.argsort(all_lines_array[:,0]) #all_lines_array = all_lines_array[argsort] #print(len(all_lines_array)) ''' absorption_cross_section = np.zeros(len(v)) while True: counter += 1 print(counter) #end = min(start + num_rows, len(all_lines_array)) #lines_array = all_lines_array[start:end] #print(len(lines_array)) lines_table = cursor.fetchmany( size=num_rows) ######################### lines_array = np.asarray(lines_table, dtype=np.float32) ################ print(lines_array.shape) ###############construct gamma and n arrays #assuming cond returns an array of indexes that are within the range of cut off #parameters for fetchall() data corresponding to tuple indexes: #(nu, a, gamma_air, n_air, delta_air, elower, g_upper, gamma_H2, n_H2, delta_H2, gamma_He, n_He, delta_He) #(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ) v_ij = lines_array[:, 0] #v_ij = nu !!!!!!! print(v_ij) a = lines_array[:, 1] gamma_air = lines_array[:, 2] n_air = lines_array[:, 3] delta_air = lines_array[:, 4] elower = lines_array[:, 5] g_upper = lines_array[:, 6] gamma_H2 = lines_array[:, 7] n_H2 = lines_array[:, 8] delta_H2 = lines_array[:, 9] gamma_He = lines_array[:, 10] n_He = lines_array[:, 11] delta_He = lines_array[:, 12] ################# #initialize an np array for gamma_p_T gamma_p_T = np.zeros(len(a)) gamma_p_T = p * (T_ref / T)**(n_air) * gamma_air v_ij_star = v_ij + p * delta_air #arrays that check whether that parameter is null in that index bool_gamma_H2 = ~np.isnan(gamma_H2) bool_gamma_He = ~np.isnan(gamma_He) bool_n_H2 = ~np.isnan(n_H2) bool_n_He = ~np.isnan(n_He) #compute gamma(p,T) for f #T_red = 296 K #gamma_p_T = p * ((T_ref / T)^n_H2 * gamma_H2 * f_H2 + (T_ref / T)^n_He * gamma_He * f_He) #where f_H2 = 0.85 and f_He = 0.15 #if either n_H2 or n_He does not exist, f_H2/He (the exisiting one) = 1.0 has_H2_and_He_gamma_N = np.all( [bool_gamma_H2, bool_n_H2, bool_gamma_He, bool_n_He], axis=0) gamma_p_T[has_H2_and_He_gamma_N] = p * (T_ref/ T)**(n_H2[has_H2_and_He_gamma_N]) * gamma_H2[has_H2_and_He_gamma_N] \ * 0.85 + (T_ref / T)**(n_He[has_H2_and_He_gamma_N]) * gamma_He[has_H2_and_He_gamma_N] * 0.15 #if n_H2 does not exist, f_He = 1 has_He_but_not_H2_gamma_N = np.all([ bool_gamma_He, bool_n_He, ~np.logical_or(bool_gamma_H2, bool_n_H2) ], axis=0) gamma_p_T[has_He_but_not_H2_gamma_N] = p * ( T_ref / T)**(n_He[has_He_but_not_H2_gamma_N] ) * gamma_He[has_He_but_not_H2_gamma_N] #if n_He does not exist, f_H2 = 1 has_H2_but_not_He_gamma_N = np.all([ bool_gamma_H2, bool_n_H2, ~np.logical_or(bool_gamma_He, bool_n_He) ], axis=0) gamma_p_T[has_H2_but_not_He_gamma_N] = p * ( T_ref / T)**(n_H2[has_H2_but_not_He_gamma_N] ) * gamma_H2[has_H2_but_not_He_gamma_N] #if both n_H2 or n_He does not exist #gamma_p_T = p * (T_ref / T)^n_air * gamma_air has_only_air_gamma_N = gamma_p_T == 0 gamma_p_T[has_only_air_gamma_N] = p * (T_ref / T)**( n_air[has_only_air_gamma_N]) * gamma_air[has_only_air_gamma_N] ################### #initialize an np array for v_ij_star v_ij_star = np.zeros(len(a)) #arrays that check whether that parameter is null in that index bool_delta_H2 = ~np.isnan(delta_H2) bool_delta_He = ~np.isnan(delta_He) bool_delta_air = ~np.isnan(delta_air) #compute v_ij_star for f #v_ij_star = v_ij + delta_net * p, wcounter += 1 #here delta_net is computed in similar fashion to gamma_p_T has_H2_and_He_delta = np.logical_and(bool_delta_H2, bool_delta_He) v_ij_star[has_H2_and_He_delta] = v_ij[has_H2_and_He_delta] + p * (delta_H2[has_H2_and_He_delta] * 0.85 + \ delta_He[has_H2_and_He_delta] * 0.15) #when delta_H2 does not exist, f_He = 1.0 has_He_but_not_H2_delta = np.logical_and(~bool_delta_H2, bool_delta_He) v_ij_star[has_He_but_not_H2_delta] = v_ij[ has_He_but_not_H2_delta] + p * delta_He[has_He_but_not_H2_delta] #when delta_He does not exist, f_H2 = 1.0 has_H2_but_not_He_delta = np.logical_and(bool_delta_H2, ~bool_delta_He) v_ij_star[has_H2_but_not_He_delta] = v_ij[ has_H2_but_not_He_delta] + p * delta_H2[has_H2_but_not_He_delta] #when both delta_H2 and delta_He does not exist, use delta_air has_air_but_not_H2_and_He_delta = np.all( [bool_delta_air, ~bool_delta_H2, ~bool_delta_He], axis=0) v_ij_star[has_air_but_not_H2_and_He_delta] = v_ij[ has_air_but_not_H2_and_He_delta] + p * delta_air[ has_air_but_not_H2_and_He_delta] #when all deltas do not exist has_no_delta = np.all( [~bool_delta_air, ~bool_delta_H2, ~bool_delta_He], axis=0) v_ij_star[has_no_delta] = v_ij[has_no_delta] #need to pass in: v_ij_star, a, elower, g_upper, gamma_p_T : all arrays ################## #indexes = np.searchsorted(ines_array, v) #where v[indexes - 1] < lines_array <= v[indexes] lower_indexes = np.searchsorted( lines_array[:, 0], v - 25, side='right' ) #where lines_array[indexes - 1] <= v - 25 < lines_array[indexes] upper_indexes = np.searchsorted( lines_array[:, 0], v + 25 ) #where lines_array[indexes - 1] < v + 25 <= lines_array[indexes] print(lower_indexes, upper_indexes) for i in range(len(v)): if i % 100000 == 0: print(i) #no_need_compute = lower_indexes == upper_indexes #print(no_need_compute) #lower_indexes = lower_indexes[no_need_compute] #print(lower_indexes) absorption_cross_section[i] = compute_one_wavenum(v[i], T, p, iso_abundance, iso_mass, Q, \ v_ij_star[lower_indexes[i] : upper_indexes[i]], a[lower_indexes[i] : upper_indexes[i]], \ elower[lower_indexes[i] : upper_indexes[i]], g_upper[lower_indexes[i] : upper_indexes[i]], \ gamma_p_T[lower_indexes[i] : upper_indexes[i]]) if len(lines_array) < num_rows: break ''' absorption_cross_section += compute_one_wavenum(v, T, p, iso_abundance, iso_mass, Q, v_ij_star, a, elower, g_upper, gamma_p_T, lower_indexes, upper_indexes) start += num_rows if end == len(all_lines_array): break ''' #close up cursor and connection #cursor.close() #db.close() print(counter) return absorption_cross_section
def compute_all(v, T, p, iso_name, line_source='default'): #get particle_id and iso_abundance using the correct function particle_data = get_particle(iso_name) particle_id = particle_data[0] iso_abundance = particle_data[1] iso_mass = particle_data[2] #get line source id if line_source == 'default': line_source_id = fetch( "SELECT default_line_source_id FROM particles WHERE particle_id = {}" .format(particle_id))[0][0] else: get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' and \ particle_id = {}".format(line_source, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception( 'should have exactly one line_source_id corresponding to one line_source and isotopologue' ) line_source_id = data[0][0] print(line_source_id) #if computing using hitemp data, use hitran partitions, so get hitran line_source_id for partitions if 'HITEMP' in line_source: get_hitran_source_id_query = "SELECT line_source, line_source_id FROM source_properties WHERE particle_id = {}".format( particle_id) sources = fetch(get_hitran_source_id_query) hitran_id = -1 for source in sources: if source[0].startswith('HITRAN'): hitran_id = source[1] if hitran_id == -1: raise Exception( 'This isotopologue has hitemp but no hitran linelist which is weird' ) #use hitran id to get partitions for hitemp Q = get_partition(T, hitran_id, particle_id) else: #for other sources, use line source id to get partitions #get paritition using the correct function Q = get_partition(T, line_source_id, particle_id) print(Q) #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #do put actual password when run #create a cursor object cursor = db.cursor() #query for all the lines of the specified isotopologue from the user given nu, line_source query = "SELECT nu, A, gamma_air, n_air, delta_air, elower, g_upper, gamma_H2, \ n_H2, delta_H2, gamma_He, n_He, delta_He FROM transitions WHERE particle_id = {} AND \ line_source_id = '{}'".format(particle_id, line_source_id) print(query) #this gives us a table of all the parameters we desire in a table in mysql cursor.execute(query) #rowcount is a read-only attribute and returns the number of rows that were affected by the execute() method. rows = cursor.rowcount print(rows, 'lines') #the table could be gigantic, therefore fetchall() could be slow, therefore #would rather fetch one single line as a tuple ( , , , ) each time and #compute the absorption for that line, store it in variable and sum it over iterations. absorption_cross_section = np.zeros(len(v)) for i in range(rows): #fetch one line line = cursor.fetchone() cond = np.logical_and(v >= line[0] - 25, v <= line[0] + 25) if np.sum(cond) > 0: absorption_cross_section[cond] += compute_one_absorption( line, v[cond], T, p, Q, iso_abundance, iso_mass) #print(i) #close up cursor and connection cursor.close() db.close() return absorption_cross_section
def import_exomol_data(mol_name, iso_name, version_name, trans_fp, states_fp, partitions_fp, \ broad_H2_fp, broad_He_fp, default_gamma, default_n, trans_file_num, reference_link): ################### if default_gamma is None: default_gamma = '\\N' print('Oh Damn this isotologue has no gamma at all in exomol data') if default_n is None: default_n = '\\N' print('Oh Damn this isotologue has no N at all in exomol data') ################### one_iso_time = time.time() #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #create a cursor object cursor = db.cursor() #disable autocommit to improve performance #sql_order('SET autocommit = 0') #sql_order('SET unique_checks = 0') #sql_order('SET foreign_key_checks = 0') #sql_order('SET sql_log_bin = 0') ################## #get particle_id get_particle_id = "SELECT particle_id FROM particles WHERE iso_name = '{}'".format(iso_name) check = fetch(get_particle_id) if check == (): #if the particle is not yet in the particle table, insert it #need to update the particle later...insert 0 for now particle_property_query = "INSERT INTO particles VALUES('%s', '%s', '%s', '%s', '%s', null);" % (mol_name, iso_name, \ 0, 0, 1) #this 1 is temporary sql_order(particle_property_query) #now get particle_id data = fetch(get_particle_id) if len(data) != 1: raise Exception('iso_name should correspond to exactly one isotopologue in the database') particle_id = data[0][0] #load H2/He params and in the mean while #insert the line_source into source_properties and get line_source_id if broad_H2_fp is None and broad_He_fp is None: #when no .broad files in exomol no_broadening_param = True insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'NO', 'NO', 'NO', '%s', \ '%s', null);" % (version_name, reference_link, particle_id) H2_dict = None He_dict = None elif broad_H2_fp is not None and broad_He_fp is not None: #when both H2 and He .broad files in exomol no_broadening_param = False H2_dict = temp_broad_param_dict(broad_H2_fp) He_dict = temp_broad_param_dict(broad_He_fp) insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'NO', 'YES', 'YES', '%s', \ '%s', null);" % (version_name, reference_link, particle_id) else: raise Exception('Should have either neither or both of the H2 and He broad param files') get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(version_name, particle_id) output = fetch(get_line_source_id_query) if output != (): #if source was inserted already line_source_id = output[0][0] else: #insert the source and get the source id #insert the line_source into source_properties and get line_source_id sql_order(insert_version_query) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception('should have exactly one line_source_id corresponding to one line_source') line_source_id = data[0][0] ##################### #insert partitions insert_partitions(partitions_fp, line_source_id, particle_id) db.commit() #load states states_time = time.time() #get parameters needed to insert exomol data into transitions print('Loading huge ass states file') #states in id order starts in 1 #for all files this is true Es, gs, Js= np.loadtxt(states_fp, usecols=(1, 2, 3), unpack=True) if no_broadening_param is False: has_K = False for key in H2_dict.keys(): if '_' in key: has_K = True for key in He_dict.keys(): if '_' in key: has_K = True #load or not load Ks if has_K is True: #when contain 'a1' or sth if mol_name == 'H2O': ##version specification for H2O if version_name == 'EXOMOL_POKAZATEL': Ks = np.loadtxt(states_fp, usecols=4, unpack=True, dtype=np.str) elif version_name == 'EXOMOL_BT2': Ks = np.loadtxt(states_fp, usecols=13, unpack=True, dtype=np.str) elif version_name == 'EXOMOL_HotWat78': Ks = np.loadtxt(states_fp, usecols=4, unpack=True, dtype=np.str) elif version_name == 'EXOMOL_VTT': Ks = np.loadtxt(states_fp, usecols=11, unpack=True, dtype=np.str) else: raise Exception('Should not have versions other than POKAZATEL, BT2, HotWat78, and VTT for H2O in EXOMOL') else: #cases like PH3 and CH4 Ks = np.loadtxt(states_fp, usecols=6, unpack=True, dtype=np.str) else: Ks = None else: #no broadening param Ks = None print('Finished loading states file in %s seconds' % (time.time() - states_time)) ###################### #insert transition files counter = 0 for file_num in range(1, trans_file_num + 1): ''' standard error control haha if iso_name == '' and version_name == '': if file_num <= 0: continue ''' curr_file = trans_fp + str(file_num) #get the number of lines in trans file length_trans = sum(1 for line in open(curr_file)) print(length_trans, 'lines : Opened the transition file') with open(curr_file) as trans: #for spliiting file into smalller chunks...but mysql auto_increment seems to not be working properly start_line = 0 max_size = 1e7 repeat = 0 while length_trans >= start_line + max_size: counter += insert_exomol(cursor, H2_dict, He_dict, Es, gs, Js, Ks, start_line, int(start_line + max_size), '/home/toma/Desktop/exomol.txt', \ trans, line_source_id, particle_id, default_gamma, default_n, no_broadening_param) #islice starts from the next line after the last read line length_trans -= max_size #print(int(length_trans)) repeat += 1 #out of the while loop when difference between start_line and the max lines in trans file is less than max_size counter += insert_exomol(cursor, H2_dict, He_dict, Es, gs, Js, Ks, start_line, int(length_trans), '/home/toma/Desktop/exomol.txt', \ trans, line_source_id, particle_id, default_gamma, default_n, no_broadening_param) #commit one file altogether at one time db.commit() trans.close() print('Finished loading {} with {} lines of data'.format(curr_file, int(length_trans + repeat * max_size))) #set @id:=0; update mytable set id = (@id := @id + 1) order by id; for correcting auto_increment if needed #turn them back on #sql_order('SET unique_checks = 1') #sql_order('SET foreign_key_checks = 1') #sql_order('SET sql_log_bin = 1') cursor.close() db.close() print("Finished inserting", counter, "lines of exomol", version_name, "data for", iso_name, "in %s seconds" % (time.time() - one_iso_time))
def insert_hitemp(fp, isotop_name, line_source, ref_link): insert_time = time.time() #connect to the database db = MySQLdb.connect(host='localhost', user='******', passwd='Happy810@', db='linelist') #create a cursor object cursor = db.cursor() #disable autocommit to improve performance sql_order('SET autocommit = 0') sql_order('SET unique_checks = 0') sql_order('SET foreign_key_checks = 0') sql_order('SET sql_log_bin = 0') try: #get particle id particle_id = get_particle(isotop_name)[0] get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(line_source, particle_id) output = fetch(get_line_source_id_query) if output != (): #source inserted already line_source_id = output[0][0] else: #insert the source and get the source id #insert the line_source into source_properties and get line_source_id insert_version_query = "INSERT IGNORE INTO source_properties(line_source, max_temperature, max_nu, num_lines, bool_air, \ bool_H2, bool_He, reference_link, particle_id, line_source_id) VALUES('%s', null, null, null, 'YES', 'NO', 'NO', '%s', \ '%s', null);" % (line_source, ref_link, particle_id) sql_order(insert_version_query) get_line_source_id_query = "SELECT line_source_id FROM source_properties WHERE line_source = '{}' AND \ particle_id = {}".format(line_source, particle_id) data = fetch(get_line_source_id_query) if len(data) != 1: raise Exception( 'should have exactly one line_source_id corresponding to one line_source' ) line_source_id = data[0][0] file_length = sum(1 for line in open(fp)) print("Bulk inserting hitemp data...") cursor.execute("LOAD DATA LOCAL INFILE '{}' INTO TABLE transitions FIELDS TERMINATED BY ' ' LINES TERMINATED BY '\n' \ (@col1, @col2, @col3, @col4, @col5, @col6, @col7) SET nu=@col1, A=@col2, gamma_air=@col3, n_air=@col4, \ delta_air=@col5, elower=@col6, g_upper=@col7, line_source_id={}, particle_id={};" .format(fp, \ line_source_id, particle_id)) #commit changes db.commit() #turn it back on sql_order('SET unique_checks = 1') sql_order('SET foreign_key_checks = 1') sql_order('SET sql_log_bin = 1') print('Executed {} lines of hitemp data'.format(file_length)) except Exception as e: #if errors occur db.rollback() print('insert hitemp data failed', e) finally: #close up cursor and connection cursor.close() db.close() print("Finished in %s seconds" % (time.time() - insert_time))