def main(): database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) log.info("Opening %s output files for resampling jobs", args.parallelism) num_files = int(args.parallelism) file_list = [] base_name = "resamplejob-" base_name += args.experiment base_name += "-" for i in range(0, num_files): filename = '' if args.jobdirectory is not None: filename = args.jobdirectory + "/" filename += base_name filename += str(uuid.uuid4()) filename += ".sh" log.debug("job file: %s", filename) f = open(filename, 'w') f.write("#!/bin/sh\n\n") file_list.append(f) file_cycle = itertools.cycle(file_list) for file in os.listdir(args.inputdirectory): if fnmatch.fnmatch(file, '*.txt'): full_fname = args.inputdirectory full_fname += "/" full_fname += file cmd = generate_sample_command(args.experiment, full_fname, args.outputdirectory, args.samplesize, args.dropthreshold) fc = file_cycle.next() log.debug("cmd: %s", cmd) fc.write(cmd) fc.write('\n') for fh in file_list: fh.close()
def main(): database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) log.info("Opening %s output files for data export jobs", args.parallelism) num_files = int(args.parallelism) file_list = [] base_name = "exportjob-" base_name += args.experiment base_name += "-" for i in range(0, num_files): filename = '' if args.jobdirectory is not None: filename = args.jobdirectory + "/" filename += base_name filename += str(uuid.uuid4()) filename += ".sh" log.debug("job file: %s", filename) f = open(filename, 'w') f.write("#!/bin/sh\n\n") file_list.append(f) file_cycle = itertools.cycle(file_list) # get a list of the input files from the database with open(args.simidfile, 'r') as simid_file: for s in simid_file: cmd = generate_export_commandline(args.experiment, args.outputdirectory, s) fc = file_cycle.next() log.debug("cmd: %s", cmd) fc.write(cmd) fc.write('\n') for fh in file_list: fh.close()
def doExport(): database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) sm_db = data.SimulationMetadataDatabase(db_args) nm_db = data.NetworkModelDatabase(db_args) data_repository_file = args.experiment data_repository_file += "-" data_repository_file += "full-data.csv" # approach is to start at the end, and walk back toward the beginning # Start with SeriationCT SeriationAnnotationData object, get IDSS seriation params # BUT - to know the column headers, we have to either accumulate all the data in memory # first or we have to peek at what columns exist in the network model database so we can # then process a row at a time. nmodel = data.NetworkModelDatabase.objects()[0] params = nmodel.model_parameters annotated_obj = data.SeriationAnnotationData.objects # Get post processing step parameters, by walking explicitly backward using the # seriation input file to get the filtered info, then filtered to get assemblage sampling, # and so on. # After postprocessing steps, use the simulation run id to get the sim params # then get the network model info and NM parameters # since with open(data_repository_file, 'wb') as csvfile: fields = get_csv_header() writer = csv.DictWriter(csvfile, fieldnames=fields) writer.writeheader()
simulation_run_id=sim_id).first() networkmodel = sim_run.networkmodel return networkmodel if __name__ == "__main__": setup() database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) sm_db = data.SimulationMetadataDatabase(db_args) full_fname = args.inputfile root = parse_filename_into_root(full_fname) networkmodel = get_networkmodel_for_input(full_fname) # we use the actual TemporalNetwork netmodel = TemporalNetwork(networkmodel_path=networkmodel, sim_length=1000) time_map = netmodel.get_subpopulation_slice_ids() # log.debug("assemblage time_map: %s", time_map) # get the list of assemblages in order sorted by the origin time sorted_assemblage_names = sorted(time_map.keys(), key=operator.itemgetter(1))
def main(): database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) log.info("Opening %s output files for seriation configuration", args.parallelism) num_files = int(args.parallelism) file_list = [] base_name = "seriationjob-" base_name += args.experiment base_name += "-" for i in range(0, num_files): filename = '' if args.jobdirectory is not None: filename = args.jobdirectory + "/" filename += base_name filename += str(uuid.uuid4()) filename += ".sh" log.debug("job file: %s", filename) f = open(filename, 'w') f.write("#!/bin/sh\n\n") file_list.append(f) file_cycle = itertools.cycle(file_list) # get a list of the input files from the database seriations = data.SeriationInputData.objects for s in seriations: input_file = s.seriation_input_file log.info("Processing input file: %s", input_file) root = parse_filename_into_root(input_file) outdir = os.getcwd() + '/' + args.outputdirectory + "/" + root try: os.mkdir(outdir) except: pass cmd = generate_seriation_commandline(input_file, outdir, s.xy_file_path, database, s.source_identifier) fc = file_cycle.next() log.debug("cmd: %s", cmd) fc.write(cmd) fc.write('\n') for fh in file_list: fh.close()
def main(): database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) log.info("Opening %s output files for assemblage sampling jobs", args.parallelism) num_files = int(args.parallelism) file_list = [] base_name = "assemsamplejob-" base_name += args.experiment base_name += "-" for i in range(0, num_files): filename = '' if args.jobdirectory is not None: filename = args.jobdirectory + "/" filename += base_name filename += str(uuid.uuid4()) filename += ".sh" log.debug("job file: %s", filename) f = open(filename, 'w') f.write("#!/bin/sh\n\n") file_list.append(f) file_cycle = itertools.cycle(file_list) for file in os.listdir(args.inputdirectory): if fnmatch.fnmatch(file, '*.txt'): full_fname = args.inputdirectory full_fname += "/" full_fname += file if args.sampletype == 'random': cmd = generate_random_sample_command(full_fname) elif args.sampletype == 'spatial': cmd = generate_spatial_sample_command(full_fname) elif args.sampletype == 'temporal': cmd = generate_temporal_sample_command(full_fname) elif args.sampletype == 'spatiotemporal': cmd = generate_spatiotemporal_sample_command(full_fname) elif args.sampletype == 'complete': cmd = generate_complete_sample_command(full_fname) elif args.sampletype == 'excludelist': cmd = generate_exclusion_sample_command(full_fname) elif args.sampletype == 'slicestratified': cmd = generate_slicestratified_sample_command(full_fname) else: print "sampletype not recognized, fatal error" sys.exit(1) fc = file_cycle.next() log.debug("cmd: %s", cmd) fc.write(cmd) fc.write('\n') for fh in file_list: fh.close()
def doExport(): database = args.experiment database += "_samples_raw" db_args = {} db_args['dbhost'] = args.dbhost db_args['dbport'] = args.dbport db_args['database'] = database db_args['dbuser'] = None db_args['dbpassword'] = None pp_db = data.PostProcessingDatabase(db_args) # the data cache has the following nested dict structure: simid -> replicate -> subpop -> class:count cmap = DeepDefaultDict() sim_id_clean = args.simid[9:] cursor = data.ClassFrequencySampleUnaveraged.m.find( dict({'simulation_run_id': args.simid}), dict(timeout=False)) for sample in cursor: rep = sample["replication"] subpop = sample["subpop"] class_count_map = sample["class_count"] for cls, count in class_count_map.items(): cmap[rep][subpop][cls] += count # conditional either we sample trait counts (which will reduce the list of traits we put in the header), # or output the full list of counts (which will put every trait in the header) class_set = set() for rep in cmap.keys(): for subpop in cmap[rep].keys(): for cls, count in cmap[rep][subpop].items(): class_set.add(cls) log.info("total number of classes: %s", len(class_set)) for rep in cmap.keys(): outputfile = args.outputdirectory + "/" + sim_id_clean + "-" + str( rep) + ".txt" class_set = set() with open(outputfile, 'wb') as outfile: for sp in cmap[rep].keys(): for cls in cmap[rep][sp].keys(): class_set.add(cls) class_list = list(class_set) # write header row header = "Assemblage_Name" for cls in class_list: header += "\t" header += cls header += "\n" outfile.write(header) for sp in cmap[rep].keys(): row = sp for cls in class_list: row += "\t" count = cmap[rep][sp][cls] row += str(int(count)) if count != {} else str(0) row += "\n" outfile.write(row) pp_db.store_exported_datafile(args.simid, outputfile)