예제 #1
0
def new2016Dataset(include_robot_and_aerial=True, include_harvest=True):
  samples = csv_utils.read_csv_as_dicts('2016.merged.csv')
  dataset_lib.convert_to_float_or_missing(samples, filter_2016_labels((
      'HARVEST_', 'COMPOSITION_', 'ROBOT_', 'AERIAL_', 'SYNTHETIC_', 'GPS_',
      'ROW', 'COLUMN')))

  input_features_starts_with = [
      'GPS_',
      'ACCESSION_',
  ]

  if include_robot_and_aerial:
    input_features_starts_with.extend(['ROBOT_', 'AERIAL_'])

  if include_harvest:
    input_features_starts_with.extend(['HARVEST_', 'SYNTHETIC_HARVEST_'])

  input_labels = filter_2016_labels(tuple(input_features_starts_with))

  output_labels = filter_2016_labels('COMPOSITION_')
  output_generators = collections.OrderedDict(sorted(
    [(x, create_2016_output_generator(x)) for x in output_labels]
  ))

  print('2016 Inputs: ' + ','.join(input_labels))
  return dataset_lib.Dataset(samples, input_labels, output_generators)
예제 #2
0
 def test_read_csv_as_dicts_good_case(self):
     file_path = self._create_test_csv(
         ['label1,label2,label3', '1,2,3', '4,5,6'])
     self.assertListEqual([{
         'label1': '1',
         'label2': '2',
         'label3': '3'
     }, {
         'label1': '4',
         'label2': '5',
         'label3': '6'
     }], csv_utils.read_csv_as_dicts(file_path))
예제 #3
0
def main():
  samples = csv_utils.read_csv_as_dicts(INPUT_PATH)
  features_to_use = filter_labels(('ROW', 'COLUMN', 'GPS_', 'ROBOT_',
                                   'HARVEST_', 'SYNTHETIC_', 'COMPOSITION_'))
  dataset.convert_to_float_or_missing(samples, features_to_use)
  X = np.array([[sample[x] for x in features_to_use] for sample in samples])

  results = np.corrcoef(X, rowvar=False)

  with open(OUTPUT_PATH, 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['feature_name'] + features_to_use)
    for feature_name, results_row in zip(features_to_use, results):
      writer.writerow([feature_name] + list(results_row))
예제 #4
0
def main():
    pool = Pool()
    samples = read_csv_as_dicts(INPUT_PATH)

    # Tuples of spatial_key1, spatial_key2, spatial_keys_description).
    mantel_runs = [
        (Features.GPS_EASTINGS, Features.GPS_NORTHINGS,
         'eastings_and_northings'),
        (Features.GPS_EASTINGS, None, 'eastings_only'),
        (Features.GPS_NORTHINGS, None, 'northings_only'),
        (Features.ROW, Features.COLUMN, 'plot_row_and_column'),
        (Features.ROW, None, 'plot_row_only'),
        (Features.COLUMN, None, 'plot_column_only'),
    ]

    for spatial_key1, spatial_key2, spatial_keys_description in mantel_runs:
        args = []
        for feature in Features:
            # This is a bit hacky way to skip values that are all text values.
            # But it works nicely right now.
            if (feature == Features.ROW or feature == Features.COLUMN
                    or feature == Features.PLANT_ID):
                continue
            if feature == Features.GPS_EASTINGS:
                break  # Ignore this Feature and all Features after this one.

            args.append((samples, spatial_key1, spatial_key2, feature))

        output_path = get_output_path(spatial_keys_description)
        print('Spawning jobs for:', output_path)
        results = pool.map(get_spatial_correlation, args)
        results.sort(key=lambda x: x[-1])  # Sort by p-value.

        with open(output_path, 'w') as f:
            csv_writer = csv.writer(f)
            csv_writer.writerow([
                'label', 'num_data_points', 'avg_data_value',
                'avg_diff_between_adjacent_plots',
                'avg_diff_between_nonadjacent_plots', 'corr_coeff', 'p_value'
            ])
            csv_writer.writerows(results)
예제 #5
0
def new2014Dataset():
  samples = csv_utils.read_csv_as_dicts('2014/2014_Pheotypic_Data_FileS2.csv')

  ADF = 'ADF (% DM)'
  NDF = 'NDF (% DM)'
  NFC = 'NFC (% DM)'
  LIGNIN = 'Lignin (% DM)'
  DRY_WEIGHT = 'Dry weight (kg)'

  input_labels = (
      'Anthesis date (days)',
      'Harvest date (days)',
      'Total fresh weight (kg)',
      'Brix (maturity)',
      'Brix (milk)',
      'Stalk height (cm)',
      # Including dry weight greatly increases predictive ability.
      #'Dry weight (kg)',
      #'Dry tons per acre',
  )

  dataset_lib.convert_to_float_or_missing(samples, list(input_labels) + [
      ADF, NDF, NFC, LIGNIN, DRY_WEIGHT])

  output_generators = collections.OrderedDict([
      ('adf', lambda sample: get_2014_weight(sample, DRY_WEIGHT, ADF)),
      ('ndf', lambda sample: get_2014_weight(sample, DRY_WEIGHT, NDF)),
      ('nfc', lambda sample: get_2014_weight(sample, DRY_WEIGHT, NFC)),
      ('lignin', lambda sample: get_2014_weight(sample, DRY_WEIGHT, LIGNIN)),
      ('c6', lambda sample: get_2014_weight(sample, DRY_WEIGHT, ADF,
                                            minus=LIGNIN)),
      ('c5', lambda sample: get_2014_weight(sample, DRY_WEIGHT, NDF,
                                            minus=ADF)),
  ])

  print('2014 Inputs: ' + ','.join(input_labels))
  return dataset_lib.Dataset(samples, input_labels, output_generators)
예제 #6
0
파일: visualize.py 프로젝트: bparr/dap
def main():
    parser = argparse.ArgumentParser(description='Visualize features')
    parser.add_argument('-f',
                        '--file',
                        required=True,
                        help='Path to input file.')
    parser.add_argument('-l',
                        '--label',
                        required=True,
                        help='Label name in input file to visualize.')
    args = parser.parse_args()

    samples = read_csv_as_dicts(args.file)
    samples = [x for x in samples if x[args.label] != '']

    rows = [average_mismatch(x[Features.GPS_EASTINGS.value]) for x in samples]
    columns = [
        average_mismatch(x[Features.GPS_NORTHINGS.value]) for x in samples
    ]
    values = [average_mismatch(x[args.label]) for x in samples]

    plt.title('Visualization of ' + args.label + ' in ' +
              os.path.basename(args.file))
    xdim = 3
    ydim = 2
    plt.scatter(rows,
                columns,
                c=values,
                s=100,
                marker=[(-xdim, -ydim), (xdim, -ydim), (xdim, ydim),
                        (-xdim, ydim), (-xdim, -ydim)])
    cb = plt.colorbar()
    cb.ax.set_title('Value')
    plt.xlabel("Row")
    plt.ylabel("Range")
    plt.show()