def sample_file(sample, fields, args, api, path=None, session_file=None): """Creates a file for each sample with the sample rows. """ query_string = sample_query_string(args, fields) sample = r.get_samples([sample], args, api, session_file=session_file, query_string=query_string)[0][0] output = args.predictions with UnicodeWriter(output, lineterminator="\n") as output: headers = [ field['name'] for field in sample['object']['sample']['fields'] ] if args.sample_header: if args.row_index or args.occurrence: new_headers = [] if args.row_index: new_headers.append("index") if args.occurrence: new_headers.append("occurrences") new_headers.extend(headers) headers = new_headers output.writerow(headers) for row in sample['object']['sample']['rows']: output.writerow(row) if args.stat_field or args.stat_fields: stat_info = {} sample_obj = sample['object']['sample'] for key in STAT_KEYS: if key in sample_obj: stat_info[key] = sample_obj[key] with open(os.path.join(path, "stat_info.json"), "w") as stat_file: json.dump(stat_info, stat_file)
def sample_file(sample, fields, args, api, path=None, session_file=None): """Creates a file for each sample with the sample rows. """ query_string = sample_query_string(args, fields) sample = r.get_samples([sample], args, api, session_file=session_file, query_string=query_string)[0][0] output = args.predictions output = csv.writer(open(output, 'w', 0), lineterminator="\n") headers = [field['name'] for field in sample['object']['sample']['fields']] if args.sample_header: if args.row_index or args.occurrence: new_headers = [] if args.row_index: new_headers.append("index") if args.occurrence: new_headers.append("occurrences") new_headers.extend(headers) headers = new_headers output.writerow(headers) for row in sample['object']['sample']['rows']: output.writerow(row) if args.stat_field or args.stat_fields: stat_info = {} sample_obj = sample['object']['sample'] for key in STAT_KEYS: if key in sample_obj: stat_info[key] = sample_obj[key] with open(os.path.join(path, "stat_info.json"), "w") as stat_file: json.dump(stat_info, stat_file)
def samples_processing(datasets, samples, sample_ids, api, args, resume, session_file=None, path=None, log=None): """Creates or retrieves samples from the input data """ # If we have a dataset but not a sample, we create the sample if the # no_sample flag hasn't been set up. if datasets and not (has_samples(args) or args.no_sample): sample_ids = [] samples = [] # Only 1 sample per bigmler command at present number_of_samples = 1 if resume: resume, sample_ids = c.checkpoint(c.are_samples_created, path, number_of_samples, debug=args.debug) if not resume: message = u.dated("Found %s samples out of %s. Resuming.\n" % (len(sample_ids), number_of_samples)) u.log_message(message, log_file=session_file, console=args.verbosity) samples = sample_ids number_of_samples -= len(sample_ids) sample_args = r.set_sample_args(args) samples, sample_ids = r.create_samples(datasets, samples, sample_args, args, api, path, session_file, log) # If a sample is provided, we use it. elif args.sample: sample_ids = [args.sample] samples = sample_ids[:] elif args.samples or args.sample_tag: samples = sample_ids[:] # We must retrieve the samples' output to store them as CSV files if sample_ids and needs_sample_fields(args): samples, sample_ids = r.get_samples(samples, args, api, session_file=session_file) return samples, sample_ids, resume
def samples_processing(datasets, samples, sample_ids, api, args, resume, session_file=None, path=None, log=None): """Creates or retrieves samples from the input data """ # If we have a dataset but not a sample, we create the sample if the # no_sample flag hasn't been set up. if datasets and not (has_samples(args) or args.no_sample): sample_ids = [] samples = [] # Only 1 sample per bigmler command at present number_of_samples = 1 if resume: resume, sample_ids = c.checkpoint( c.are_samples_created, path, number_of_samples, debug=args.debug) if not resume: message = u.dated("Found %s samples out of %s. Resuming.\n" % (len(sample_ids), number_of_samples)) u.log_message(message, log_file=session_file, console=args.verbosity) samples = sample_ids number_of_samples -= len(sample_ids) sample_args = r.set_sample_args(args) samples, sample_ids = r.create_samples(datasets, samples, sample_args, args, api, path, session_file, log) # If a sample is provided, we use it. elif args.sample: sample_ids = [args.sample] samples = sample_ids[:] elif args.samples or args.sample_tag: samples = sample_ids[:] # We must retrieve the samples' output to store them as CSV files if sample_ids and needs_sample_fields(args): samples, sample_ids = r.get_samples(samples, args, api, session_file=session_file) return samples, sample_ids, resume