def load_file(self, fname, data): rows = CSVFileReader(fname).get_content() scenarios = [] for row in rows: scenario = [] keys = [ x for x in row.keys() if x not in ['utility', 'rmse', 'cost'] ] for key in keys: entry = dict() if key in months: entry['month'] = key entry['value'] = (row[key] == "True") else: entry['variable'] = key entry['value'] = (row[key] == "True") scenario.append(entry) final_scenario = from_solution(scenario, data) final_scenario.set_rmse(row['rmse']) util = float(row['utility']) final_scenario.set_utility(util) scenarios.append(final_scenario) self._population = scenarios
def parse_records(self, location, correct_records): content = CSVFileReader(location.get_harvest_dataf()).get_content() for entry in content: entry = formats.on_read(entry, self._location) # perform corrections and fixes if correct_records: for entry in content: # fix that annoying problem in BSBEC 2011 where they split the # harvest if location.get_name() == Location.BSBEC and \ entry[formats.DATE] == datetime(2011, 7, 19): entry[formats.DATE] = datetime(2011, 7, 4) # fix entries without sub samples (2016 harvest, subsampling # was done only on the 12 plot harvest, as it is a more # accurate moisture measurement) if formats.DW_SUB not in entry.keys(): match = next(x for x in content if x[formats.DATE] == entry[formats.DATE] and x[formats.UID] == entry[formats.UID] and x[formats.PSEUDO_REP] == 0) ratio = match[formats.DW_SUB] / match[formats.FW_SUB] entry[formats.DW_PLANT] = entry[formats.FW_PLANT] * ratio return content
def read_directory(self, path): if self._format == SINGLE_FILE: data = CSVFileReader(os.path.join( path, "transmission.csv")).get_content() for entry in data: entry[formats.UID] = entry['Plot'] entry[formats.PSEUDO_REP] = 0 entry[formats.MEASUREMENT] = 0 entry[formats.DATE] = "%s %s" % (entry['Day'], entry['Year']) entry[formats.TIME] = None del entry['Plot'] del entry['Year'] del entry['Day'] entry = formats.on_read(entry, self._location, "%j %Y") return data elif self._format == MULTIPLE_FILES: # read all txt files in path files = os.listdir(path) r = re.compile(".*(TXT|txt)$") files = filter(r.match, files) data = [] for f in files: data += self.read_txt(os.path.join(path, f)) return data
def load_content(self): fll_f = self._location.get_fll_location() reader = CSVFileReader(fll_f) content = reader.get_content() for entry in content: entry['Date'] = datetime.strptime( "%s %s" % (entry['fll_day'], entry['year']), "%j %Y") self._content = content
def get_data(self): transmission_data = CSVFileReader(self._file_location).get_content() for entry in transmission_data: entry['Date'] = datetime.strptime( "%s %s" % (entry['Day'], entry['Year']), "%j %Y") entry['Day'] = int(entry['Day']) entry['Transmission'] = float(entry['Transmission']) return transmission_data
def _load_submodels(self, model_cache): """Loads meta.csv""" reader = CSVFileReader(os.path.join(model_cache, 'meta.csv')) content = reader.get_content() for entry in content: for ch in [',', "'", '[', ']']: entry['variables'] = entry['variables'].replace(ch, '') entry['variables'] = entry['variables'].split(' ') entry['year'] = int(entry['year']) return content
def _cache_load(self): cache_fname = os.path.join(os.environ.get("HOME"), ".data_reader.cache") if os.path.isfile(cache_fname): self._combined_data = CSVFileReader(cache_fname).get_content() for entry in self._combined_data: entry = formats.on_read(entry, None) self._combined_data = self._fill_vals(self._combined_data) print("Loaded cache data from %s" % cache_fname) return True else: return False
def load_cm(self, fname): content = CSVFileReader(fname).get_content() for entry in content: entry['probability'] = float(entry['probability']) entry['success'] = float(entry['success']) if entry['method'] == "CM1": entry['function'] = self._cm1 elif entry['method'] == "CM2": entry['function'] = self._cm2 elif entry['method'] == "CM3": entry['function'] = self._cm3 elif entry['method'] == "CM4": entry['function'] = self._cm4 elif entry['method'] == "CM5": entry['function'] = self._cm5 elif entry['method'] == "CM6": entry['function'] = self._cm6 self._cm_functions = content
def __init__(self, location, t_base): self._met_data = CSVFileReader(location.get_met_data()).get_content() self._t_base = t_base for reading in self._met_data: reading[formats.DATE] = datetime.strptime(reading['Date'], "%d/%m/%Y") reading.pop('Date') reading[formats.PAR] = self.parse_float(reading[formats.PAR]) reading[formats.T_MAX] = self.parse_float(reading[formats.T_MAX]) reading[formats.T_MIN] = self.parse_float(reading[formats.T_MIN]) reading[formats.RAINFALL] = \ self.parse_float(reading[formats.RAINFALL]) reading[formats.DD] = degree_days(reading[formats.T_MAX], reading[formats.T_MIN], self._t_base) for key in reading: if (reading[key] == "NA"): reading[key] = None
def calc_RUE(self, LER_dict, k_dict, location, LAI): met_data = MetDataReaderCSV(location).get_met_data() fll_reader = FLLReader(location) genotypes = set([x['Genotype'] for x in LER_dict]) destructive_phenos = CSVFileReader( location.get_destr_phenos()).get_content() for entry in destructive_phenos: entry['Date'] = datetime.strptime(entry['Date'], "%Y-%m-%d %H:%M:%S UTC") try: entry['fresh'] = float( entry['Fresh weight above ground material(g)']) entry['fresh_sub'] = float( entry['Fresh weight above ground sub-sample(g)']) entry['dry_sub'] = float( entry['dry weight above ground sub-sample(g)']) except ValueError: try: entry['dry_weight'] = float( entry['dry weight above ground sub-sample(g)']) except ValueError: pass continue if entry['fresh_sub'] == 0.0: entry['dry_weight'] = entry['dry_sub'] continue entry['dry_weight'] = entry['fresh'] * (entry['dry_sub'] / entry['fresh_sub']) destructive_phenos = [ x for x in destructive_phenos if 'dry_weight' in x ] #run the simulation per genotype RUE = [] for genotype in genotypes: geno_sub = [ x for x in destructive_phenos if x['Genotype'] == genotype ] dates = list(set([x['Date'] for x in geno_sub])) dates.sort() #create data point groups by dates that are close #to each other or the same groups = [] group_id = 0 for date in dates: for group in groups: delta = group['Dates'][0] - date days = math.fabs(delta.days) if days and days < 20: group['Dates'].append(date) break else: #create new group group = {'id': group_id, 'Dates': [date]} groups.append(group) group_id += 1 #get the mean dry weight per group mean_DW = [] #add entry for fll day fll_date = fll_reader.get_genotype_fll(genotype) mean_DW.append({'Date': fll_date, 'Yield': 0.0}) for group in groups: group_phenos = [ x for x in geno_sub if x['Date'] in group['Dates'] ] total_dw = 0.0 for entry in group_phenos: total_dw += entry['dry_weight'] total_dw /= float(len(group_phenos)) #correct the group date to the first one in the group mean_DW.append({ 'Date': sorted(group['Dates'])[0], 'Yield': total_dw }) #obtain genotype specific coefficients LER = [x for x in LER_dict if x['Genotype'] == genotype] LER.sort(key=lambda x: x['stage']) k = [x for x in k_dict if x['Genotype'] == genotype] if len(k) > 1: k = next(x['k'] for x in k if x['Year'] == location.get_year()) else: k = sorted(k, key=lambda x: x['Year'])[0]['k'] #simulate PAR and record values for days of destructive harvests real_LAI = [x for x in LAI if x['Genotype'] == genotype] mean_DW = self.simulate_PAR(k, LER, met_data, fll_date, mean_DW, real_LAI) #finally work out what the RUE is from #the real DMY and simulated PAR values temp_file = tempfile.mkstemp()[1] + genotype.split("-")[0] CSVFileWriter(temp_file, mean_DW) robjects.r(''' calc_RUE_r <- function(fname){ data <- read.csv(fname) data$Yield <- data$Yield * 2 fit <- lm(Yield ~ PAR + 0, data = data) return(summary(fit)$coefficients[1]) } ''') calc_RUE_r = robjects.r("calc_RUE_r") RUE_val = calc_RUE_r(temp_file)[0] RUE.append({'Genotype': genotype, 'RUE': RUE_val}) return RUE
def __init__(self, data, scenario, root_dir, load=False): """data - {'cd_data': [], 'ml_data': []} scenario - 'simple_ml' or 'process_ml' """ # hardcoded algorithm variables, could supply them to the # constructor if needed # self._PSize = 45 TODO real value self._PSize = 12 # weight for previous score entry, when updating the score table self._alpha = 0.3 # self._b = 20 TODO real value self._b = 8 self._proc_count = 4 # set class variables self._variables = [formats.STEM_COUNT, formats.CANOPY_HEIGHT, formats.TRANSMISSION, formats.FLOWERING_SCORE, formats.LEAF_AREA_INDEX, formats.COL, formats.ROW, formats.DD, formats.GENOTYPE, formats.RAINFALL, formats.DOY, formats.PAR] self._variables.sort() self._scenario = scenario self._root_dir = root_dir if scenario == "simple_ml": self._methods = ['rf', 'knn', 'gbm'] elif scenario == "compound": self._methods = ['NaiveMLProcessModelMemfix', 'GAWinModel'] else: raise Exception("STUB") # TODO self._data = self._hack_data(data) self._months = list(set([x[formats.DATE].strftime("%B") for x in self._data['ml_data']])) self._months.sort() # find maximum RMSE for methods self._max_rmse = self._get_max_rmse() # DB to contain all solutions ever explored self._database = ScatterPhenoScenarioContainer() self._score_table = self._empty_score_table() if load: sc_file = os.path.join(self._root_dir, 'score_table.csv') self._score_table = CSVFileReader(sc_file).get_content() for entry in self._score_table: entry['score'] = float(entry['score']) entry['value'] = (entry['value'] == "True") db_file = os.path.join(self._root_dir, 'database.csv') self._database.load_file(db_file, self._data) self._update_score_table() self._run_algorithm2() else: self._run_algorithm()
def parse_file(self, filename): content = CSVFileReader(filename).get_content() content = self.parse_content(content) return content