예제 #1
0
def sample_file(sample, fields, args, api, path=None, session_file=None):
    """Creates a file for each sample with the sample rows.

    """
    query_string = sample_query_string(args, fields)
    sample = r.get_samples([sample],
                           args,
                           api,
                           session_file=session_file,
                           query_string=query_string)[0][0]
    output = args.predictions
    with UnicodeWriter(output, lineterminator="\n") as output:
        headers = [
            field['name'] for field in sample['object']['sample']['fields']
        ]
        if args.sample_header:
            if args.row_index or args.occurrence:
                new_headers = []
                if args.row_index:
                    new_headers.append("index")
                if args.occurrence:
                    new_headers.append("occurrences")
                    new_headers.extend(headers)
                headers = new_headers
            output.writerow(headers)
        for row in sample['object']['sample']['rows']:
            output.writerow(row)
        if args.stat_field or args.stat_fields:
            stat_info = {}
            sample_obj = sample['object']['sample']
            for key in STAT_KEYS:
                if key in sample_obj:
                    stat_info[key] = sample_obj[key]
            with open(os.path.join(path, "stat_info.json"), "w") as stat_file:
                json.dump(stat_info, stat_file)
예제 #2
0
def sample_file(sample, fields, args, api, path=None, session_file=None):
    """Creates a file for each sample with the sample rows.

    """
    query_string = sample_query_string(args, fields)
    sample = r.get_samples([sample], args, api,
                            session_file=session_file,
                            query_string=query_string)[0][0]
    output = args.predictions
    output = csv.writer(open(output, 'w', 0), lineterminator="\n")
    headers = [field['name'] for field in sample['object']['sample']['fields']]
    if args.sample_header:
        if args.row_index or args.occurrence:
            new_headers = []
            if args.row_index:
                new_headers.append("index")
            if args.occurrence:
                new_headers.append("occurrences")
                new_headers.extend(headers)
            headers = new_headers
        output.writerow(headers)
    for row in sample['object']['sample']['rows']:
        output.writerow(row)
    if args.stat_field or args.stat_fields:
        stat_info = {}
        sample_obj = sample['object']['sample']
        for key in STAT_KEYS:
            if key in sample_obj:
                stat_info[key] = sample_obj[key]
        with open(os.path.join(path, "stat_info.json"), "w") as stat_file:
            json.dump(stat_info, stat_file)
예제 #3
0
파일: samples.py 프로젝트: mamack/bigmler
def samples_processing(datasets,
                       samples,
                       sample_ids,
                       api,
                       args,
                       resume,
                       session_file=None,
                       path=None,
                       log=None):
    """Creates or retrieves samples from the input data

    """

    # If we have a dataset but not a sample, we create the sample if the
    # no_sample flag hasn't been set up.
    if datasets and not (has_samples(args) or args.no_sample):
        sample_ids = []
        samples = []

        # Only 1 sample per bigmler command at present
        number_of_samples = 1
        if resume:
            resume, sample_ids = c.checkpoint(c.are_samples_created,
                                              path,
                                              number_of_samples,
                                              debug=args.debug)
            if not resume:
                message = u.dated("Found %s samples out of %s. Resuming.\n" %
                                  (len(sample_ids), number_of_samples))
                u.log_message(message,
                              log_file=session_file,
                              console=args.verbosity)

            samples = sample_ids
            number_of_samples -= len(sample_ids)

        sample_args = r.set_sample_args(args)
        samples, sample_ids = r.create_samples(datasets, samples, sample_args,
                                               args, api, path, session_file,
                                               log)
    # If a sample is provided, we use it.
    elif args.sample:
        sample_ids = [args.sample]
        samples = sample_ids[:]

    elif args.samples or args.sample_tag:
        samples = sample_ids[:]

    # We must retrieve the samples' output to store them as CSV files
    if sample_ids and needs_sample_fields(args):
        samples, sample_ids = r.get_samples(samples,
                                            args,
                                            api,
                                            session_file=session_file)

    return samples, sample_ids, resume
예제 #4
0
파일: samples.py 프로젝트: bigmlcom/bigmler
def samples_processing(datasets, samples, sample_ids,
                       api, args, resume,
                       session_file=None, path=None,
                       log=None):
    """Creates or retrieves samples from the input data

    """

    # If we have a dataset but not a sample, we create the sample if the
    # no_sample flag hasn't been set up.
    if datasets and not (has_samples(args) or args.no_sample):
        sample_ids = []
        samples = []

        # Only 1 sample per bigmler command at present
        number_of_samples = 1
        if resume:
            resume, sample_ids = c.checkpoint(
                c.are_samples_created, path, number_of_samples,
                debug=args.debug)
            if not resume:
                message = u.dated("Found %s samples out of %s. Resuming.\n"
                                  % (len(sample_ids),
                                     number_of_samples))
                u.log_message(message, log_file=session_file,
                              console=args.verbosity)

            samples = sample_ids
            number_of_samples -= len(sample_ids)

        sample_args = r.set_sample_args(args)
        samples, sample_ids = r.create_samples(datasets, samples,
                                               sample_args, args, api,
                                               path, session_file, log)
    # If a sample is provided, we use it.
    elif args.sample:
        sample_ids = [args.sample]
        samples = sample_ids[:]

    elif args.samples or args.sample_tag:
        samples = sample_ids[:]

    # We must retrieve the samples' output to store them as CSV files
    if sample_ids and needs_sample_fields(args):
        samples, sample_ids = r.get_samples(samples, args, api,
                                            session_file=session_file)

    return samples, sample_ids, resume