def check_threads(machine_file, input_file): # for our datastream there is simply one process per line in the machine file # so we could simply use that to work out the number of processes. # however this is more thorough. log.info('checking how processes are assigned') #set nprocesses to 1 to account for master n_processes = 1 #find out the number of datastreams and add this to nprocesses active_datastreams = int(get_parameter('ACTIVE DATASTREAMS', input_file)) n_processes += active_datastreams #open machine file and thread file mf = open(machine_file, 'r') thread_file = get_parameter('CORE CONF FILENAME', input_file) tf = open(thread_file, 'r') #note which machine will be the manager manager = mf.next().strip() #note which machines will be the datastreams datastreams = [] for i in range(active_datastreams): datastreams.append(mf.next().strip()) #find out number of cores from first line of the threads file and work out #and work out which machines will be cores and how many threads will run on #each cores = [] threads = [] ncores = int(tf.next().split(':')[1].strip()) for i in range(ncores): try: threads.append(int(tf.next())) except StopIteration: raise RuntimeError, 'faulty .threads file' try: cores.append(mf.next().strip()) except StopIteration: log.warning( 'warning, mismatch in number of cores and number of machines in machine file' ) n_processes += sum(threads) log.info('machine file ' + machine_file) log.info('thread file ' + thread_file) log.info('n_processes ' + str(n_processes)) log.info('manager ' + manager) for i in range(active_datastreams): log.info('datastream ' + str(i + 1).rjust(4) + ' ' + datastreams[i]) for i in range(ncores): log.info('core ' + str(i + 1).rjust(4) + ' ' + cores[i].rjust(2) + ' ' + str(threads[i]).rjust(2) + ' thread(s)') return { 'n_processes': n_processes, 'manager': manager, 'datastreams': datastreams, 'cores': cores, 'threads': threads }
def run_mpifxcorr(rootname, np, mpi=None, machine_file=None, mpifxcorr=None, input_file=None, timeout=None): machine_file = os.path.abspath(rootname + '.machine') input_file = os.path.abspath(rootname + '.input') if mpi == None: mpi = observation.mpi if mpifxcorr == None: correlator = observation.mpifxcorr if timeout == None: timeout = observation.mpifxcorr_timeout check_outfile(input_file) import difxlog as log command = mpi + ' -nolocal -np ' + str(np) + ' -machinefile ' +\ machine_file + ' ' + mpifxcorr + ' ' + input_file exec_time = get_parameter('EXECUTE TIME (SEC)', input_file) spawn(command, defreg, spawn_func, (time(), exec_time), timeout=120) log.info('Correlator Finished')
def machine_gen(machines, machine_file, input_file): """ Generate a machine file. It is necessary to run parse_clusterfile first """ nstreams = int(get_parameter('ACTIVE DATASTREAMS', input_file)) try: machinefile = open(machine_file, 'w') except: raise (RuntimeError, "Can't open machine file" + machine_file) #Manager: print machines machinefile.write(machines[0][0] + '\n') #Datastreams for i in range(nstreams): if machines[i][3] == 0: raise (RuntimeError, "not enough datastreams in cluster file") else: machinefile.write(machines[i][0] + '\n') #Cores #work out the number of cores ncores = 0 for i in machines: ncores += i[1] for j in range(i[1]): machinefile.write(i[0] + '\n') return nstreams, ncores
def calc_gen(input_file, obscode, job_id=None, increment=None, spectral_average=None, taper_function=None): """ generates the first part of a calc file """ print 'writing calc header from input file' if job_id == None: job_id = observation.job_id if increment == None: increment = observation.increment if spectral_average == None: spectral_average = observation.spectral_average if taper_function == None: taper_function = observation.taper_function execute_time, start_mjd, start_seconds, telescope_entries =\ get_parameter(('EXECUTE TIME (SEC)', 'START MJD', 'START SECONDS', 'TELESCOPE ENTRIES'), input_file) start_fraction = start_seconds / 86400. job_start_time = str(start_mjd + start_fraction) job_end_time = str(start_mjd + start_fraction + (execute_time / 86400.)) start_year, start_month, start_day = mjd2ymd(start_mjd) start_hour, start_minute, start_second = df2hms(start_fraction) # t_names = [('TELESCOPE NAME ' + str(i)) for i in range(int(telescope_entries))] # telescope_names = get_parameter(t_names) return [['JOB ID', job_id], ['JOB START TIME', str(job_start_time)], ['JOB END TIME', str(job_end_time)], ['OBSCODE', obscode], ['START MJD', str(start_mjd)], ['START YEAR', str(start_year)], ['START MONTH', str(start_month)], ['START DAY', str(start_day)], ['START HOUR', str(start_hour)], ['START MINUTE', str(start_minute)], ['START SECOND', str(start_second)], ['INCREMENT (SECS)', increment], ['SPECTRAL AVG', spectral_average], ['TAPER FUNCTION', taper_function]]
def thread_gen(input_file, ncores, machines): """ Generate a threads file. input_file is the input file ncores is the total number of cores machines is an array defining the cluster as returned by parse_clusterfile() """ threadfilename = get_parameter('CORE CONF FILENAME', input_file) try: threadfile = open(threadfilename, 'w') except: raise (RuntimeError, "Can't open machine file" + machine_file) threadfile.write('NUMBER OF CORES: ' + str(ncores) + '\n') for i in machines: for j in range(i[1]): threadfile.write(str(i[2]) + '\n')
def add_clock(logs, input_file, starttime=None): """ replace clock parameters in input_file """ if starttime == None: mjd, startsecs = get_parameter(('START MJD', 'START SECONDS'), input_file) mjd = int(mjd) mjd += float(startsecs) / 86400. starttime = mjd2datetime(mjd) l = [] for i in range(len(logs)): intercept, rate = log2delay(logs[i], starttime) l += clock_parameters(intercept, rate, i) parameters = [] values = [] for i in l: parameters.append(i[0]) values.append(i[1]) set_parameter(parameters, values, input_file)
def run_mpifxcorr(rootname, np, mpi=None, machine_file=None, mpifxcorr=None, input_file=None, timeout=None): # machine_file = os.path.abspath(rootname + '.machine') # input_file = os.path.abspath(rootname + '.input') if mpi == None: mpi = observation.mpi if mpifxcorr == None: mpifxcorr = observation.mpifxcorr if timeout == None: timeout = observation.mpifxcorr_timeout check_outfile(input_file) import difxlog as log # ' -x LD_LIBRARY_PATH' +\ command = mpi + ' -np ' + str(np) +\ ' -machinefile ' + machine_file + ' ' +\ ' -byslot ' +\ mpifxcorr + ' ' + input_file exec_time = get_parameter('EXECUTE TIME (SEC)', input_file) int_time = float(get_parameter('INT TIME (SEC)', input_file)) log.info('Int time = ' + str(int_time)) log.info('num channels = ' + get_parameter('NUM CHANNELS', input_file)) log.info('Vis Buffer Length = ' + get_parameter('VIS BUFFER LENGTH', input_file)) log.info('Blocks Per Send = ' + get_parameter('BLOCKS PER SEND', input_file)) log.info('Data Buffer Factor = ' + get_parameter('DATA BUFFER FACTOR', input_file)) log.info('num Data Segments = ' + get_parameter('NUM DATA SEGMENTS', input_file)) spawn(command, defreg, mpifxcorrSpawnClass, (time(), exec_time, int_time), timeout=timeout) log.info('Correlator Finished')
def get_input(self, parameter, occurrence = 0): get_parameter(parameter, self.input, occurrence)
def get_calc(self, parameter, occurrence = 0): get_parameter(parameter, self.calc, occurrence)
def check_outfile(input_file): output_file = get_parameter('OUTPUT FILENAME', input_file) if os.path.exists(output_file): log.error('Output file ' + output_file + ' exists.') raise RuntimeError, "Output file exists."
timeout = None # read arguments rootname = args[0] machine_file = rootname + '.machine' input_file = rootname + '.input' np = check_threads(machine_file, input_file)['n_processes'] mpipath = observation.mpi mpifxcorr = observation.mpifxcorr for o, a in opts: if o in ("-t", "--timeout"): timeout = int(a) elif o == "--mpi": mpipath = a elif o == "--mpifxcorr": mpifxcorrpath = a elif o == "--np": np = int(a) elif o in ("-d", "--delete"): output_file = get_parameter('OUTPUT FILENAME', input_file) if os.path.exists(output_file): log.warning('removing ' + output_file) rmtree(output_file) run_mpifxcorr(rootname, np, mpipath, machine_file, mpifxcorr, input_file, timeout) if __name__ == "__main__": main()
def check_threads(machine_file, input_file): log.info('checking how processes are assigned') #find out the number of datastreams and add this to nprocesses active_datastreams = int(get_parameter('ACTIVE DATASTREAMS', input_file)) #open machine file and thread file mf = open(machine_file, 'r') thread_file = get_parameter('CORE CONF FILENAME', input_file) tf = open(thread_file, 'r') # parse machine file mftable = [] for line in mf: if '#' in line: line = line.split('#')[0] if line == '': continue line = line.split() mfline = {} mfline['host'] = line[0] for entry in line[1:]: entry = entry.split('=') mfline[entry[0]] = int(entry[1]) mftable.append(mfline) #remove unused nodes and add up total number of processes np = 0 nodes = [] for host in mftable: node = {} node['host'] = host['host'] node['processes'] = [] if host.has_key('max_slots'): node['np'] = host['max_slots'] node['ncores'] = node['np'] np += node['np'] elif host.has_key('slots'): node['np'] = host['slots'] node['ncores'] = node['np'] np += node['np'] else: node['np'] = 1 node['ncores'] = node['np'] np += node['np'] if node['np'] > 0: nodes.append(node) print node['host'], str(node['np']), str(np) print nodes # note which machine will be the manager # note which machines will be the datastreams if not np > active_datastreams: raise RuntimeError, "Not enough available slots to run FXManager and datastreams" manager = None datastreams = [] datastreams_assigned = 0 while (datastreams_assigned < active_datastreams): for node in nodes: #will only run once if not manager: manager = node['host'] node['ncores'] -= 1 node['processes'].append('FXManager') else: if datastreams_assigned < active_datastreams: datastreams_assigned += 1 datastreams.append(node['host']) node['ncores'] -= 1 node['processes'].append('datastream ' + str(datastreams_assigned)) log.info('Machine file ' + machine_file) log.info('Thread file ' + thread_file) log.info('np ' + str(np)) log.info('Hosts:') for node in nodes: log.info(" " + node['host'] + ':') for process in node['processes']: log.info(" " + process) if node['ncores'] > 0: log.info(" + " + str(node['ncores']) + " core(s)") log.info('') return np