def collect_epochs_by_site(self, epoch_groups, ts): epochs = {} for e in iterable(epoch_groups[ts].getEpochs()): sources = to_dict(e.getInputSources()) for s in sources.values(): if s.getLabel() in self.df.Site.base: epochs[s.getLabel()] = e return epochs
def to_dict_should_convert_flat_map(): m = Maps.newHashMap() m.put('key1', 'value1') m.put('key2', autoclass("java.lang.Integer")(2)) m.put('key3', autoclass("java.lang.Double")(2.5)) d = to_dict(m) check_dict(d, m)
def should_tag_site_with_species(self): species = set(self.df.Species) for group in iterable(self.expt.getEpochGroups()): for epoch in iterable(group.getEpochs()): src_map = to_dict(epoch.getInputSources()) for src in src_map.values(): if len(list(iterable(src.getParentSources()))) == 0: tags = set(iterable(src.getAllTags())) assert(len(tags) > 0) for tag in tags: assert(tag in species)
def _import_file(context, container, protocol, file_name, header_row, timezone, first_measurement_column_number, date_column): df = read_csv(file_name, header_row=header_row, date_column=date_column) # Organize sources; this should be replaced with getSourceWithName() or a query sites = {} for src in iterable(context.getTopLevelSources()): sites[src.getLabel()] = src for plot in df.Site: if plot not in sites: logging.info("Adding site " + plot) sites[plot] = context.insertSource(plot, plot) #TODO better name? # Group EpochData by (index, Site), i.e. (Date, Site) epoch_data = df.groupby([df.index, 'Site']) groups = {} for grp in iterable(container.getEpochGroups()): d = grp.getStart() ts = pd.Timestamp(datetime.datetime(d.getYear(), d.getMonthOfYear(), d.getDayOfMonth(), d.getHourOfDay(), d.getMinuteOfHour(), d.getSecondOfMinute())) groups[ts] = grp for (group_index, group) in epoch_data: logging.info("Adding data for CSV group" + str(group_index)) # Get the Source object corresponding to this site plot_name = group_index[1] plot = sites[plot_name] ts = group_index[0] start,end = _make_day_ends(ts, timezone) # One EpochGroup per day if ts not in groups: group_name = "{}-{}-{}".format(start.getYear(), start.getMonthOfYear(), start.getDayOfMonth()) print("Adding EpochGroup {}".format(group_name)) groups[ts] = container.insertEpochGroup(group_name, start, protocol, None, None) # No protocol, params, or deviceParams epoch_group = groups[ts] # Epoch by site epochs = {} for epoch in iterable(epoch_group.getEpochs()): src_map = to_dict(epoch.getInputSources()) for src in src_map.values(): epochs[src.getLabel()] = epoch if not plot_name in epochs: print("Inserting Epoch for measurements at: {}".format(plot_name)) epochs[plot_name] = epoch_group.insertEpoch(start, end, protocol, None, None) epoch = epochs[plot_name] for i in xrange(len(group)): species = group['Species'][i] observer = group['Observer'][i] print(" {}".format(species)) # Tag the Source with the species found there try: plot.addTag(species) except JavaException: logging.error("Exception adding tag. Retrying...") plot.addTag(species) logging.info("Successfully added tag on second try") measurements = group.iloc[i, first_measurement_column_number:].dropna() if group['Type'][i] == MEASUREMENT_TYPE_SITE: epoch.addInputSource(plot_name, plot) srcNames = Sets.newHashSet() srcNames.add(plot_name) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) elif group['Type'][i] == MEASUREMENT_TYPE_INDIVIDUAL: individual = plot.insertSource(epoch_group, start, end, protocol, Maps.newHashMap(), Optional.absent(), u"{} {}".format(species, i+1), u"{}-{}-{}-{}".format(species, plot_name, start.toString(), i+1),) epoch.addInputSource(individual.getLabel(), individual) srcNames = Sets.newHashSet() srcNames.add(individual.getLabel()) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) epoch.addTag('individual') return 0
def _import_file(context, container, protocol, file_name, header_row, timezone, first_measurement_column_number, date_column): df = read_csv(file_name, header_row=header_row, date_column=date_column) # Organize sources; this should be replaced with getSourceWithName() or a query sites = {} for src in iterable(context.getTopLevelSources()): sites[src.getLabel()] = src for plot in df.Site: if plot not in sites: logging.info("Adding site " + plot) sites[plot] = context.insertSource(plot, plot) #TODO better name? # Group EpochData by (index, Site), i.e. (Date, Site) epoch_data = df.groupby([df.index, 'Site']) groups = {} for grp in iterable(container.getEpochGroups()): d = grp.getStart() ts = pd.Timestamp( datetime.datetime(d.getYear(), d.getMonthOfYear(), d.getDayOfMonth(), d.getHourOfDay(), d.getMinuteOfHour(), d.getSecondOfMinute())) groups[ts] = grp for (group_index, group) in epoch_data: logging.info("Adding data for CSV group" + str(group_index)) # Get the Source object corresponding to this site plot_name = group_index[1] plot = sites[plot_name] ts = group_index[0] start, end = _make_day_ends(ts, timezone) # One EpochGroup per day if ts not in groups: group_name = "{}-{}-{}".format(start.getYear(), start.getMonthOfYear(), start.getDayOfMonth()) print("Adding EpochGroup {}".format(group_name)) groups[ts] = container.insertEpochGroup( group_name, start, protocol, None, None) # No protocol, params, or deviceParams epoch_group = groups[ts] # Epoch by site epochs = {} for epoch in iterable(epoch_group.getEpochs()): src_map = to_dict(epoch.getInputSources()) for src in src_map.values(): epochs[src.getLabel()] = epoch if not plot_name in epochs: print("Inserting Epoch for measurements at: {}".format(plot_name)) epochs[plot_name] = epoch_group.insertEpoch( start, end, protocol, None, None) epoch = epochs[plot_name] for i in xrange(len(group)): species = group['Species'][i] observer = group['Observer'][i] print(" {}".format(species)) # Tag the Source with the species found there try: plot.addTag(species) except JavaException: logging.error("Exception adding tag. Retrying...") plot.addTag(species) logging.info("Successfully added tag on second try") measurements = group.iloc[ i, first_measurement_column_number:].dropna() if group['Type'][i] == MEASUREMENT_TYPE_SITE: epoch.addInputSource(plot_name, plot) srcNames = Sets.newHashSet() srcNames.add(plot_name) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) elif group['Type'][i] == MEASUREMENT_TYPE_INDIVIDUAL: individual = plot.insertSource( epoch_group, start, end, protocol, Maps.newHashMap(), Optional.absent(), u"{} {}".format(species, i + 1), u"{}-{}-{}-{}".format(species, plot_name, start.toString(), i + 1), ) epoch.addInputSource(individual.getLabel(), individual) srcNames = Sets.newHashSet() srcNames.add(individual.getLabel()) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) epoch.addTag('individual') return 0
def should_set_device_parameters(self): assert_equals(self.device_info.keys(), to_dict(asclass("Experiment", self.epoch_group.getParent()).getEquipmentSetup().getDeviceDetails()).keys())