def insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer): tmp = tempfile.NamedTemporaryFile( prefix="{}-{}-{}-{}".format(start.getYear(), start.getMonthOfYear(), start.getDayOfMonth(), plot_name.replace('#', '')), suffix=".csv", delete=False) temp_data_frame = pd.DataFrame({group['Counting'][i]: measurements}) temp_data_frame.to_csv(tmp.name, index_label="Measurement") m = epoch.insertMeasurement(species, srcNames, Sets.newHashSet(), File(tmp.name).toURI().toURL(), 'text/csv') time.sleep(1.0) m.addProperty('Observer', str(observer))
def insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer): tmp = tempfile.NamedTemporaryFile(prefix="{}-{}-{}-{}".format( start.getYear(), start.getMonthOfYear(), start.getDayOfMonth(), plot_name.replace('#', '')), suffix=".csv", delete=False) temp_data_frame = pd.DataFrame({group['Counting'][i]: measurements}) temp_data_frame.to_csv(tmp.name, index_label="Measurement") m = epoch.insertMeasurement(species, srcNames, Sets.newHashSet(), File(tmp.name).toURI().toURL(), 'text/csv') time.sleep(1.0) m.addProperty('Observer', str(observer))
def _import_file(context, container, protocol, file_name, header_row, timezone, first_measurement_column_number, date_column): df = read_csv(file_name, header_row=header_row, date_column=date_column) # Organize sources; this should be replaced with getSourceWithName() or a query sites = {} for src in iterable(context.getTopLevelSources()): sites[src.getLabel()] = src for plot in df.Site: if plot not in sites: logging.info("Adding site " + plot) sites[plot] = context.insertSource(plot, plot) #TODO better name? # Group EpochData by (index, Site), i.e. (Date, Site) epoch_data = df.groupby([df.index, 'Site']) groups = {} for grp in iterable(container.getEpochGroups()): d = grp.getStart() ts = pd.Timestamp(datetime.datetime(d.getYear(), d.getMonthOfYear(), d.getDayOfMonth(), d.getHourOfDay(), d.getMinuteOfHour(), d.getSecondOfMinute())) groups[ts] = grp for (group_index, group) in epoch_data: logging.info("Adding data for CSV group" + str(group_index)) # Get the Source object corresponding to this site plot_name = group_index[1] plot = sites[plot_name] ts = group_index[0] start,end = _make_day_ends(ts, timezone) # One EpochGroup per day if ts not in groups: group_name = "{}-{}-{}".format(start.getYear(), start.getMonthOfYear(), start.getDayOfMonth()) print("Adding EpochGroup {}".format(group_name)) groups[ts] = container.insertEpochGroup(group_name, start, protocol, None, None) # No protocol, params, or deviceParams epoch_group = groups[ts] # Epoch by site epochs = {} for epoch in iterable(epoch_group.getEpochs()): src_map = to_dict(epoch.getInputSources()) for src in src_map.values(): epochs[src.getLabel()] = epoch if not plot_name in epochs: print("Inserting Epoch for measurements at: {}".format(plot_name)) epochs[plot_name] = epoch_group.insertEpoch(start, end, protocol, None, None) epoch = epochs[plot_name] for i in xrange(len(group)): species = group['Species'][i] observer = group['Observer'][i] print(" {}".format(species)) # Tag the Source with the species found there try: plot.addTag(species) except JavaException: logging.error("Exception adding tag. Retrying...") plot.addTag(species) logging.info("Successfully added tag on second try") measurements = group.iloc[i, first_measurement_column_number:].dropna() if group['Type'][i] == MEASUREMENT_TYPE_SITE: epoch.addInputSource(plot_name, plot) srcNames = Sets.newHashSet() srcNames.add(plot_name) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) elif group['Type'][i] == MEASUREMENT_TYPE_INDIVIDUAL: individual = plot.insertSource(epoch_group, start, end, protocol, Maps.newHashMap(), Optional.absent(), u"{} {}".format(species, i+1), u"{}-{}-{}-{}".format(species, plot_name, start.toString(), i+1),) epoch.addInputSource(individual.getLabel(), individual) srcNames = Sets.newHashSet() srcNames.add(individual.getLabel()) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) epoch.addTag('individual') return 0
def _import_file(context, container, protocol, file_name, header_row, timezone, first_measurement_column_number, date_column): df = read_csv(file_name, header_row=header_row, date_column=date_column) # Organize sources; this should be replaced with getSourceWithName() or a query sites = {} for src in iterable(context.getTopLevelSources()): sites[src.getLabel()] = src for plot in df.Site: if plot not in sites: logging.info("Adding site " + plot) sites[plot] = context.insertSource(plot, plot) #TODO better name? # Group EpochData by (index, Site), i.e. (Date, Site) epoch_data = df.groupby([df.index, 'Site']) groups = {} for grp in iterable(container.getEpochGroups()): d = grp.getStart() ts = pd.Timestamp( datetime.datetime(d.getYear(), d.getMonthOfYear(), d.getDayOfMonth(), d.getHourOfDay(), d.getMinuteOfHour(), d.getSecondOfMinute())) groups[ts] = grp for (group_index, group) in epoch_data: logging.info("Adding data for CSV group" + str(group_index)) # Get the Source object corresponding to this site plot_name = group_index[1] plot = sites[plot_name] ts = group_index[0] start, end = _make_day_ends(ts, timezone) # One EpochGroup per day if ts not in groups: group_name = "{}-{}-{}".format(start.getYear(), start.getMonthOfYear(), start.getDayOfMonth()) print("Adding EpochGroup {}".format(group_name)) groups[ts] = container.insertEpochGroup( group_name, start, protocol, None, None) # No protocol, params, or deviceParams epoch_group = groups[ts] # Epoch by site epochs = {} for epoch in iterable(epoch_group.getEpochs()): src_map = to_dict(epoch.getInputSources()) for src in src_map.values(): epochs[src.getLabel()] = epoch if not plot_name in epochs: print("Inserting Epoch for measurements at: {}".format(plot_name)) epochs[plot_name] = epoch_group.insertEpoch( start, end, protocol, None, None) epoch = epochs[plot_name] for i in xrange(len(group)): species = group['Species'][i] observer = group['Observer'][i] print(" {}".format(species)) # Tag the Source with the species found there try: plot.addTag(species) except JavaException: logging.error("Exception adding tag. Retrying...") plot.addTag(species) logging.info("Successfully added tag on second try") measurements = group.iloc[ i, first_measurement_column_number:].dropna() if group['Type'][i] == MEASUREMENT_TYPE_SITE: epoch.addInputSource(plot_name, plot) srcNames = Sets.newHashSet() srcNames.add(plot_name) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) elif group['Type'][i] == MEASUREMENT_TYPE_INDIVIDUAL: individual = plot.insertSource( epoch_group, start, end, protocol, Maps.newHashMap(), Optional.absent(), u"{} {}".format(species, i + 1), u"{}-{}-{}-{}".format(species, plot_name, start.toString(), i + 1), ) epoch.addInputSource(individual.getLabel(), individual) srcNames = Sets.newHashSet() srcNames.add(individual.getLabel()) insert_measurements(epoch, group, i, measurements, plot_name, species, srcNames, start, observer) epoch.addTag('individual') return 0
def to_java_set(s): result = Sets.newHashSet() for item in s: result.add(box_number(item)) return result