isos = Iso.objects.filter(molecule__molecID=molec_id).all() print len(isos), 'isotopologues for molec', molec_id transitions = Trans.objects.filter(iso__in=isos).all() ntrans = len(transitions) last_pc = 0 for i, trans in enumerate(transitions): prms = trans.prm_set #for prm_name in ('Sw', 'A', 'gamma_air', 'gamma_self', 'n_air', # 'delta_air'): # exec('trans.%s = prms.get(name="%s")' % (prm_name, prm_name)) Ierr = trans.par_line[127:133] Iref = trans.par_line[133:145] this_trans = HITRANTransition() this_trans.par_line = trans.par_line this_trans.molec_id = molec_id this_trans.iso_id = trans.iso.isoID this_trans.nu = HITRANParam(val=prms.get(name="nu").val, ref=int(Iref[:2]), name='nu', ierr=int(Ierr[0])) this_trans.Sw = HITRANParam(val=prms.get(name="Sw").val, ref=int(Iref[2:4]), name='Sw', ierr=int(Ierr[1]), relative=True) this_trans.A = HITRANParam(val=prms.get(name="A").val, ref=int(Iref[2:4]),
isos = Iso.objects.filter(molecule__molecID=molec_id).all() print len(isos),'isotopologues for molec', molec_id transitions = Trans.objects.filter(iso__in=isos).all() ntrans = len(transitions) last_pc = 0 for i, trans in enumerate(transitions): prms = trans.prm_set #for prm_name in ('Sw', 'A', 'gamma_air', 'gamma_self', 'n_air', # 'delta_air'): # exec('trans.%s = prms.get(name="%s")' % (prm_name, prm_name)) Ierr = trans.par_line[127:133] Iref = trans.par_line[133:145] this_trans = HITRANTransition() this_trans.par_line = trans.par_line this_trans.molec_id = molec_id this_trans.iso_id = trans.iso.isoID this_trans.nu = HITRANParam(val=prms.get(name="nu").val, ref=int(Iref[:2]), name='nu', ierr=int(Ierr[0])) this_trans.Sw = HITRANParam(val=prms.get(name="Sw").val, ref=int(Iref[2:4]), name='Sw', ierr=int(Ierr[1]), relative=True) this_trans.A = HITRANParam(val=prms.get(name="A").val, ref=int(Iref[2:4]), name='Sw', ierr=int(Ierr[1]), relative=True) this_trans.gamma_air = HITRANParam(val=prms.get(name="gamma_air").val, ref=int(Iref[4:6]), name='gamma_air', ierr=int(Ierr[2]), relative=True) try: this_trans.gamma_self = HITRANParam(
def upload_data(args, molecule, isos, d_refs): """ Upload the new transitions and states to the database. Only do this for real if args.dry_run = False. Arguments: args: the processed command line arguments with the names of files to use in uploading the data, the list of HITRAN molecule and isotopologue IDs to resolve the global_iso_id to etc... molecule: the Molecule object for the molecule whose transitions and states are to be uploaded. isos: a list of Iso objects, ordered by their local isotopologue ID cases_list: a list of Case objects, where the index is the case_id (ie cases_list[0] is None, cases_list[1] represents the dcs case etc... d_refs: a dictionary of RefsMap objects, keyed by HITRAN-style refID, e.g. 'O2-gamma_self-2'. """ if args.dry_run: vprint('[DRY RUN] Uploading to database...') else: vprint('Uploading to database...') # first, expire old lines expire_old_transitions(args) # get the all the molecular state description 'cases' in a list indexed # by caseID cases = Case.objects.all() cases_list = [None,] # caseIDs start at 1, so case_list[0]=None for case in cases: cases_list.append(case) # find out the ID at which we can start adding states try: first_stateID = State.objects.all().order_by('-id')[0].id + 1 except IndexError: # no states in the database yet, so we start at 1 first_stateID = 1 vprint('new states will be added with ids starting at %d' % first_stateID) # upload the new states states = upload_states(args, isos, cases_list) # get the Source objects we'll need to attach to the parameters sources = get_sources(d_refs) # this is the default Source for when we can't find anything better: hitran86_source = Source.objects.all().filter(pk=HITRAN1986_SOURCEID).get() # now read in and upload the transitions if args.dry_run: vprint('[DRY RUN] Uploading transitions ...') else: cursor = connection.cursor() vprint('Uploading transitions ...') start_time = time.time() ntrans = 0 for line in open(args.trans_file_upload, 'r'): line = line.rstrip() # strip the EOL because the last field is par_line trans = HITRANTransition() for prm_name in trans_prms: # create and attach the HITRANParam objects setattr(trans, prm_name, HITRANParam(None)) fields = line.split(',') for i, output_field in enumerate(trans_fields): # set the transition attributes trans.set_param(output_field.name, fields[i], output_field.fmt) # attach the upper state to the transition if trans.stateIDp < first_stateID: # this state is already in the database: find it trans.statep = State.objects.all().get(pk=trans.stateIDp) else: # new upper state: get it from the states list trans.statep = states[trans.stateIDp-first_stateID] # attach the lower state to the transition if trans.stateIDpp < first_stateID: # this state is already in the database: find it trans.statepp = State.objects.all().get(pk=trans.stateIDpp) else: # new lower state: get it from the states list trans.statepp = states[trans.stateIDpp-first_stateID] # attach the case_module for this transition's states' quantum numbers trans.case_module = hitran_meta.get_case_module(trans.molec_id, trans.local_iso_id) # fetch the right Iso object iso = isos[trans.local_iso_id-1] # this_trans is a hitranmeta.Trans object for the MySQL database this_trans = Trans(iso=iso, statep=trans.statep, statepp=trans.statepp, nu=trans.nu.val, Sw=trans.Sw.val, A=trans.A.val, multipole=trans.multipole, Elower=trans.Elower, gp=trans.gp, gpp=trans.gpp, valid_from=args.s_mod_date, par_line=trans.par_line) ntrans += 1 if not args.dry_run: # if we're really uploading, save the transition to the database this_trans.save() # create and execute the INSERT strings for the transition's parameters for prm_name in trans_prms: val = trans.get_param_attr(prm_name, 'val') if val is None: # no value for this parameter - move on to the next one continue # fetch the Source object for this parameter source_id = trans.get_param_attr(prm_name, 'source_id') if source_id is None: # if we can't identify source_id, it's missing from the # hitranmeta_refs_map and/or hitransmeta_source tables: # this is fatal and we must exit with an error print 'Error! no reference specified for', prm_name sys.exit(1) # create the parameter object for this parameter prm_fields = ['trans_id', 'val'] prm_entries = [str(this_trans.id), str(val)] err = trans.get_param_attr(prm_name, 'err') if err is not None: prm_fields.append('err') prm_entries.append(str(err)) prm_fields.append('ierr') prm_entries.append(str(trans.get_param_attr(prm_name, 'ierr'))) prm_fields.append('source_id') prm_entries.append(str(source_id)) s_insert = 'INSERT INTO prm_%s (%s) VALUES (%s)'\ % (prm_name.lower(), ', '.join(prm_fields), ', '.join(prm_entries)) if not args.dry_run: # if we're really uploading, save the prm to the database cursor.execute(s_insert) end_time = time.time() vprint('%d transitions read in (%s)' % (ntrans, timed_at(end_time - start_time)))
def parse_par(args, molecule, isos, d_refs): """ Parse the input .par file, args.par_file, into normalized .states and .trans files, checking for the existence of the relevant sources and not outputing duplicates. All transitions encountered are written to the .trans file, even if they're already in the database - duplicate- handling is done upon staging the upload. NB the input .par file must be in order of increasing wavenumber (an error is raised if this is found not to be the case). """ # get all of the states for this molecule currently in the database # as their string representations - these are the keys to the db_stateIDs # dictionary, with the corresponding database State ids as their values db_stateIDs = {} for state in State.objects.filter(iso__in=isos): db_stateIDs[state.str_rep()] = state.id vprint('%d existing states for %s read in from database'\ % (len(db_stateIDs), molecule.ordinary_formula)) vprint('Creating .trans and .states files...') vprint('%s\n-> %s\n %s'\ % (args.par_file, args.trans_file, args.states_file)) if not args.overwrite: # the .trans and .states files should not already exist for filename in (args.trans_file, args.states_file): if os.path.exists(filename): vprint('File exists:\n%s\nAborting.' % filename, 5) sys.exit(1) # read the lines and rstrip them of the EOL characters. We don't lstrip # because we keep the space in front of molec_ids 1-9 vprint('reading .par lines from %s ...' % args.par_file) lines = [x.rstrip() for x in open(args.par_file, 'r').readlines()] ntrans = len(lines) vprint('%d lines read in' % ntrans) # find out the state ID at which we can start adding states try: first_stateID = State.objects.all().order_by('-id')[0].id + 1 except IndexError: # no states in the database yet, so let's start at 1 first_stateID = 1 vprint('new states will be added with ids starting at %d' % first_stateID) fo_s = open(args.states_file, 'w') fo_t = open(args.trans_file, 'w') start_time = time.time() stateID = first_stateID last_nu = 0. # the previous wavenumber read in percent_done = 0; percent_increment = 1 # for the progress indicator for i, line in enumerate(lines): # progress indicator, as a percentage percent = float(i)/ntrans * 100. if percent - percent_done > percent_increment: vprint('%d %%' % percent_done, 1) percent_done += percent_increment # parse the par_line into a HITRANTransition object trans = HITRANTransition.parse_par_line(line) if trans is None: # blank or comment line continue # check our wavenumbers are in order if trans.nu.val < last_nu: vprint('Error: %s transitions file isn\'t ordered by nu.'\ % args.trans_file, 5) sys.exit(1) last_nu = trans.nu.val # set the global (ie database-wide) ID for the isotopologue in # the transition and its upper and lower state objects trans.global_iso_id = args.global_iso_ids[ (trans.molec_id, trans.local_iso_id)] trans.statep.global_iso_id = trans.global_iso_id trans.statepp.global_iso_id = trans.global_iso_id # first deal with the upper state: get its string representation ... statep_str_rep = trans.statep.str_rep() # ... and see if it's in our dictionary: if statep_str_rep in db_stateIDs.keys(): # the upper state is already in the database: set the # corresponding state ID in the transition object trans.stateIDp = db_stateIDs[statep_str_rep] else: # the upper state is new: assign it an ID and save it trans.stateIDp = trans.statep.id = stateID db_stateIDs[statep_str_rep] = stateID stateID += 1 print >>fo_s, statep_str_rep # next deal with the lower state: get its string representation ... statepp_str_rep = trans.statepp.str_rep() # ... and see if it's in our dictionary: if statepp_str_rep in db_stateIDs.keys(): # the lower state is already in the database: set the # corresponding state ID in the transition object trans.stateIDpp = db_stateIDs[statepp_str_rep] else: # the lower state is new: assign it an ID and save it trans.stateIDpp = trans.statepp.id = stateID db_stateIDs[statepp_str_rep] = stateID stateID += 1 print >>fo_s, statepp_str_rep # check that the references for this transition's parameters are in # the tables hitranmeta_refs_map and hitranmeta_source - if they # aren't this is fatal, so we exit for j, prm_name in enumerate(['nu', 'S', 'gamma_air', 'gamma_self', 'n_air', 'delta_air']): # the reference fields of the par_line are at character # positions 134-146 of the 160-byte par_line, in 2-character fields iref = int(trans.par_line[133+2*j:135+2*j]) # work out which Source in hitranmeta_source this reference id # is pointing to, using the hitranmeta_refs_map table to map it # to a primary key in the hitranmeta_source table. if iref == 0: # don't worry about missing 0 refs (which default to the # HITRAN 1986 paper) source_id = HITRAN1986_SOURCEID else: # form a HITRAN-style source identifier as # <molecule_name>-<prm_name>-<id>, for looking up in the # hitranmeta_refs_map table sref = '%s-%s-%d' % (molecule.ordinary_formula, prm_name, iref) # we can't use '+' in XML attributes, so replace with 'p' sref = sref.replace('+', 'p') if sref not in d_refs.keys(): # Oops - missing reference: bail. print 'missing reference for %s in hitranmeta_refs_map'\ ' table' % sref sys.exit(1) # all's well - we have a valid source_id source_id = d_refs[sref].source_id # TODO avoid exec here # Assign the source_id to the parameter object if prm_name == 'S': exec('trans.Sw.source_id = %d' % source_id) exec('trans.A.source_id = %d' % source_id) else: try: exec('trans.%s.source_id = %d' % (prm_name, source_id)) except AttributeError: # no parameter object exists for prm_name; this can # happen if e.g. delta_air=0. and none was created, but # it's fine- we just move on pass # write the transition to the .trans file, *even if it is already # in the database* - this is checked for on upload print >>fo_t, trans.to_str(trans_fields, ',') fo_t.close() fo_s.close() vprint('%d new or updated states were identified'\ % (stateID-first_stateID)) end_time = time.time() vprint('%d transitions and %d states in %.1f secs'\ % (len(lines), len(db_stateIDs), end_time - start_time))
return CaseClass(molec_id=molec_id, local_iso_id=local_iso_id, global_iso_id=global_iso_id, E=state.energy, g=state.g, s_qns=state.s_qns) else: return states[stateID - stateID_offset] stateID_offset = State.objects.all().order_by('-id')[0].id + 1 trans = [] start_time = time.time() line_no = 0 ncorrections = 0 co = open(corrections_file, 'w') for line in open(trans_file, 'r'): line_no += 1 line = line.rstrip() this_trans = HITRANTransition() # create the HITRANParam objects for the trans_prms parameters for prm_name in trans_prms: setattr(this_trans, prm_name, HITRANParam(None)) #eval('this_trans.%s=HITRANParam(None)' % prm_name) fields = line.split(',') for i, output_field in enumerate(trans_fields): this_trans.set_param(output_field.name, fields[i], output_field.fmt) #print this_trans.stateIDp, this_trans.stateIDpp this_trans.statep = get_state(this_trans.stateIDp) this_trans.statepp = get_state(this_trans.stateIDpp) this_trans.case_module = hitran_meta.get_case_module(this_trans.molec_id, this_trans.local_iso_id) # OH (A-X) system is a special case: if this_trans.statep.global_iso_id == 48 and s_qns[15]=='A':
sys.path.append(hitran_path) os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' from hitranmeta.models import * isos = Iso.objects.all() abundances = {} for iso in isos: abundances['%2d%1d' % (iso.molecule_id, iso.isoID)] = iso.abundance par_dir = os.path.join(HOME, 'research/HITRAN/HITRAN2008/HITRAN2008/'\ 'By-Molecule/Uncompressed-files') par_name = sys.argv[1] par_path = os.path.join(par_dir, par_name) lines = [x.rstrip() for x in open(par_path, 'r').readlines()] for i,line in enumerate(lines): trans = HITRANTransition.parse_par_line(line) if trans is None: # blank or comment line continue if trans.gp is None: print 'unassigned line at %12.6f' % trans.nu.val continue A = calc_A(trans.nu.val, trans.Elower, trans.gp, trans.Sw.val, Q, T, abundances[trans.par_line[:3]]) if trans.par_line[25:35] != '%10.3E' % A: #print trans.par_line[25:35], '%10.3E' % A print '%10.3E %10.3E' % (trans.A.val, A) #if i > 10: # sys.exit(0)