def load_file(self, fname, data):
        rows = CSVFileReader(fname).get_content()
        scenarios = []
        for row in rows:
            scenario = []
            keys = [
                x for x in row.keys() if x not in ['utility', 'rmse', 'cost']
            ]
            for key in keys:
                entry = dict()
                if key in months:
                    entry['month'] = key
                    entry['value'] = (row[key] == "True")
                else:
                    entry['variable'] = key
                    entry['value'] = (row[key] == "True")

                scenario.append(entry)

            final_scenario = from_solution(scenario, data)
            final_scenario.set_rmse(row['rmse'])
            util = float(row['utility'])

            final_scenario.set_utility(util)
            scenarios.append(final_scenario)

        self._population = scenarios
    def parse_records(self, location, correct_records):
        content = CSVFileReader(location.get_harvest_dataf()).get_content()
        for entry in content:
            entry = formats.on_read(entry, self._location)

        # perform corrections and fixes
        if correct_records:
            for entry in content:
                # fix that annoying problem in BSBEC 2011 where they split the
                # harvest
                if location.get_name() == Location.BSBEC and \
                        entry[formats.DATE] == datetime(2011, 7, 19):
                    entry[formats.DATE] = datetime(2011, 7, 4)

                # fix entries without sub samples (2016 harvest, subsampling
                # was done only on the 12 plot harvest, as it is a more
                # accurate moisture measurement)
                if formats.DW_SUB not in entry.keys():
                    match = next(x for x in content
                                 if x[formats.DATE] == entry[formats.DATE]
                                 and x[formats.UID] == entry[formats.UID]
                                 and x[formats.PSEUDO_REP] == 0)

                    ratio = match[formats.DW_SUB] / match[formats.FW_SUB]
                    entry[formats.DW_PLANT] = entry[formats.FW_PLANT] * ratio

        return content
Exemple #3
0
    def read_directory(self, path):
        if self._format == SINGLE_FILE:
            data = CSVFileReader(os.path.join(
                path, "transmission.csv")).get_content()

            for entry in data:
                entry[formats.UID] = entry['Plot']
                entry[formats.PSEUDO_REP] = 0
                entry[formats.MEASUREMENT] = 0
                entry[formats.DATE] = "%s %s" % (entry['Day'], entry['Year'])
                entry[formats.TIME] = None

                del entry['Plot']
                del entry['Year']
                del entry['Day']

                entry = formats.on_read(entry, self._location, "%j %Y")

            return data

        elif self._format == MULTIPLE_FILES:
            # read all txt files in path
            files = os.listdir(path)
            r = re.compile(".*(TXT|txt)$")
            files = filter(r.match, files)
            data = []
            for f in files:
                data += self.read_txt(os.path.join(path, f))

            return data
Exemple #4
0
    def load_content(self):
        fll_f = self._location.get_fll_location()
        reader = CSVFileReader(fll_f)
        content = reader.get_content()
        for entry in content:
            entry['Date'] = datetime.strptime(
                "%s %s" % (entry['fll_day'], entry['year']), "%j %Y")

        self._content = content
    def get_data(self):
        transmission_data = CSVFileReader(self._file_location).get_content()
        for entry in transmission_data:
            entry['Date'] = datetime.strptime(
                "%s %s" % (entry['Day'], entry['Year']), "%j %Y")
            entry['Day'] = int(entry['Day'])
            entry['Transmission'] = float(entry['Transmission'])

        return transmission_data
Exemple #6
0
    def _load_submodels(self, model_cache):
        """Loads meta.csv"""
        reader = CSVFileReader(os.path.join(model_cache, 'meta.csv'))
        content = reader.get_content()

        for entry in content:
            for ch in [',', "'", '[', ']']:
                entry['variables'] = entry['variables'].replace(ch, '')

            entry['variables'] = entry['variables'].split(' ')
            entry['year'] = int(entry['year'])

        return content
Exemple #7
0
    def _cache_load(self):
        cache_fname = os.path.join(os.environ.get("HOME"),
                                   ".data_reader.cache")

        if os.path.isfile(cache_fname):
            self._combined_data = CSVFileReader(cache_fname).get_content()
            for entry in self._combined_data:
                entry = formats.on_read(entry, None)

            self._combined_data = self._fill_vals(self._combined_data)

            print("Loaded cache data from %s" % cache_fname)
            return True
        else:
            return False
    def load_cm(self, fname):
        content = CSVFileReader(fname).get_content()
        for entry in content:
            entry['probability'] = float(entry['probability'])
            entry['success'] = float(entry['success'])
            if entry['method'] == "CM1":
                entry['function'] = self._cm1
            elif entry['method'] == "CM2":
                entry['function'] = self._cm2
            elif entry['method'] == "CM3":
                entry['function'] = self._cm3
            elif entry['method'] == "CM4":
                entry['function'] = self._cm4
            elif entry['method'] == "CM5":
                entry['function'] = self._cm5
            elif entry['method'] == "CM6":
                entry['function'] = self._cm6

        self._cm_functions = content
    def __init__(self, location, t_base):
        self._met_data = CSVFileReader(location.get_met_data()).get_content()
        self._t_base = t_base

        for reading in self._met_data:
            reading[formats.DATE] = datetime.strptime(reading['Date'],
                                                      "%d/%m/%Y")
            reading.pop('Date')

            reading[formats.PAR] = self.parse_float(reading[formats.PAR])
            reading[formats.T_MAX] = self.parse_float(reading[formats.T_MAX])
            reading[formats.T_MIN] = self.parse_float(reading[formats.T_MIN])
            reading[formats.RAINFALL] = \
                self.parse_float(reading[formats.RAINFALL])

            reading[formats.DD] = degree_days(reading[formats.T_MAX],
                                              reading[formats.T_MIN],
                                              self._t_base)

            for key in reading:
                if (reading[key] == "NA"):
                    reading[key] = None
Exemple #10
0
    def calc_RUE(self, LER_dict, k_dict, location, LAI):
        met_data = MetDataReaderCSV(location).get_met_data()
        fll_reader = FLLReader(location)
        genotypes = set([x['Genotype'] for x in LER_dict])

        destructive_phenos = CSVFileReader(
            location.get_destr_phenos()).get_content()
        for entry in destructive_phenos:
            entry['Date'] = datetime.strptime(entry['Date'],
                                              "%Y-%m-%d %H:%M:%S UTC")

            try:
                entry['fresh'] = float(
                    entry['Fresh weight above ground material(g)'])
                entry['fresh_sub'] = float(
                    entry['Fresh weight above ground  sub-sample(g)'])
                entry['dry_sub'] = float(
                    entry['dry weight above ground sub-sample(g)'])
            except ValueError:
                try:
                    entry['dry_weight'] = float(
                        entry['dry weight above ground sub-sample(g)'])
                except ValueError:
                    pass
                continue

            if entry['fresh_sub'] == 0.0:
                entry['dry_weight'] = entry['dry_sub']
                continue
            entry['dry_weight'] = entry['fresh'] * (entry['dry_sub'] /
                                                    entry['fresh_sub'])

        destructive_phenos = [
            x for x in destructive_phenos if 'dry_weight' in x
        ]

        #run the simulation per genotype
        RUE = []
        for genotype in genotypes:
            geno_sub = [
                x for x in destructive_phenos if x['Genotype'] == genotype
            ]
            dates = list(set([x['Date'] for x in geno_sub]))
            dates.sort()

            #create data point groups by dates that are close
            #to each other or the same
            groups = []
            group_id = 0
            for date in dates:
                for group in groups:
                    delta = group['Dates'][0] - date
                    days = math.fabs(delta.days)
                    if days and days < 20:
                        group['Dates'].append(date)
                        break
                else:
                    #create new group
                    group = {'id': group_id, 'Dates': [date]}
                    groups.append(group)
                    group_id += 1

            #get the mean dry weight per group
            mean_DW = []
            #add entry for fll day
            fll_date = fll_reader.get_genotype_fll(genotype)
            mean_DW.append({'Date': fll_date, 'Yield': 0.0})

            for group in groups:
                group_phenos = [
                    x for x in geno_sub if x['Date'] in group['Dates']
                ]
                total_dw = 0.0
                for entry in group_phenos:
                    total_dw += entry['dry_weight']

                total_dw /= float(len(group_phenos))

                #correct the group date to the first one in the group
                mean_DW.append({
                    'Date': sorted(group['Dates'])[0],
                    'Yield': total_dw
                })

            #obtain genotype specific coefficients
            LER = [x for x in LER_dict if x['Genotype'] == genotype]
            LER.sort(key=lambda x: x['stage'])
            k = [x for x in k_dict if x['Genotype'] == genotype]
            if len(k) > 1:
                k = next(x['k'] for x in k if x['Year'] == location.get_year())
            else:
                k = sorted(k, key=lambda x: x['Year'])[0]['k']

            #simulate PAR and record values for days of destructive harvests
            real_LAI = [x for x in LAI if x['Genotype'] == genotype]
            mean_DW = self.simulate_PAR(k, LER, met_data, fll_date, mean_DW,
                                        real_LAI)

            #finally work out what the RUE is from
            #the real DMY and simulated PAR values
            temp_file = tempfile.mkstemp()[1] + genotype.split("-")[0]
            CSVFileWriter(temp_file, mean_DW)
            robjects.r('''
				calc_RUE_r <- function(fname){
					data <- read.csv(fname)
					data$Yield <- data$Yield * 2
					fit <- lm(Yield ~ PAR + 0, data = data)
					return(summary(fit)$coefficients[1])
				}
				''')
            calc_RUE_r = robjects.r("calc_RUE_r")
            RUE_val = calc_RUE_r(temp_file)[0]
            RUE.append({'Genotype': genotype, 'RUE': RUE_val})

        return RUE
Exemple #11
0
    def __init__(self, data, scenario, root_dir, load=False):
        """data - {'cd_data': [], 'ml_data': []}
        scenario - 'simple_ml' or 'process_ml'
        """

        # hardcoded algorithm variables, could supply them to the
        # constructor if needed
        # self._PSize = 45 TODO real value
        self._PSize = 12

        # weight for previous score entry, when updating the score table
        self._alpha = 0.3

        # self._b = 20 TODO real value
        self._b = 8

        self._proc_count = 4

        # set class variables
        self._variables = [formats.STEM_COUNT, formats.CANOPY_HEIGHT,
                           formats.TRANSMISSION, formats.FLOWERING_SCORE,
                           formats.LEAF_AREA_INDEX, formats.COL,
                           formats.ROW, formats.DD, formats.GENOTYPE,
                           formats.RAINFALL, formats.DOY, formats.PAR]
        self._variables.sort()

        self._scenario = scenario
        self._root_dir = root_dir
        if scenario == "simple_ml":
            self._methods = ['rf', 'knn', 'gbm']
        elif scenario == "compound":
            self._methods = ['NaiveMLProcessModelMemfix', 'GAWinModel']
        else:
            raise Exception("STUB")  # TODO

        self._data = self._hack_data(data)

        self._months = list(set([x[formats.DATE].strftime("%B") for x in
                                self._data['ml_data']]))
        self._months.sort()

        # find maximum RMSE for methods
        self._max_rmse = self._get_max_rmse()

        # DB to contain all solutions ever explored
        self._database = ScatterPhenoScenarioContainer()
        self._score_table = self._empty_score_table()
        if load:
            sc_file = os.path.join(self._root_dir, 'score_table.csv')
            self._score_table = CSVFileReader(sc_file).get_content()
            for entry in self._score_table:
                entry['score'] = float(entry['score'])
                entry['value'] = (entry['value'] == "True")

            db_file = os.path.join(self._root_dir, 'database.csv')
            self._database.load_file(db_file, self._data)
            self._update_score_table()

            self._run_algorithm2()
        else:
            self._run_algorithm()
Exemple #12
0
 def parse_file(self, filename):
     content = CSVFileReader(filename).get_content()
     content = self.parse_content(content)
     return content