def load_features(self): (self.sites, self.site_names, self.features, self.feature_names, self.state_names, self.states, self.families, self.family_names, self.log_load_features) = read_features_from_csv( file=self.config['data']['features'], feature_states_file=self.config['data']['feature_states']) self.network = compute_network(self.sites, crs=self.crs)
def read_data(self): print('Reading input data...') if self.is_simulation: self.sites, self.site_names, _ = read_sites(self.path_data) else: self.sites, self.site_names, _, _, _, _, self.families, self.family_names, _ = \ read_features_from_csv(self.path_data, self.path_feature_states) self.network = compute_network(self.sites) self.locations, self.dist_mat = self.network[ 'locations'], self.network['dist_mat']
def load_features(self): self.features = [] c_id = self.ds["ParameterTable", 'id'].name for feature in self.ds["ParameterTable"]: self.features.append(feature[c_id]) self.features = numpy.array([[1, 2]]) c_id = self.ds["LanguageTable", 'id'].name c_name = self.ds["LanguageTable", 'name'].name c_lon = self.ds["LanguageTable", 'longitude'].name c_lat = self.ds["LanguageTable", 'latitude'].name self.sites = Sites(*zip(*[(site[c_id], (site[c_lon], site[c_lat]), site[c_name]) for site in self.ds["LanguageTable"]])) self.network = compute_network(self.sites)
def run_simulation(self): self.inheritance = self.config['INHERITANCE'] self.subset = self.config['SUBSET'] # Get sites self.sites, self.site_names, self.log_read_sites = read_sites(file=self.sites_file, retrieve_family=self.inheritance, retrieve_subset=self.subset) self.network = compute_network(self.sites) # self.areas = assign_area(area_id=self.config['AREA'], sites_sim=self.sites) # Simulate families if self.inheritance: self.families, self.family_names = assign_family(fam_id=1, sites_sim=self.sites) else: self.families = None # Simulate weights, i.e. the influence of universal pressure, contact and inheritance on each feature self.weights = simulate_weights(i_universal=self.config['I_UNIVERSAL'], i_contact=self.config['I_CONTACT'], i_inheritance=self.config['I_INHERITANCE'], inheritance=self.inheritance, n_features=self.config['N_FEATURES']) attempts = 0 while True: attempts += 1 # Simulate probabilities for features to be universally preferred, # passed through contact (and inherited if available) self.p_universal, self.p_contact, self.p_inheritance \ = simulate_assignment_probabilities(e_universal=self.config['E_UNIVERSAL'], e_contact=self.config['E_CONTACT'], e_inheritance=self.config['E_INHERITANCE'], inheritance=self.inheritance, n_features=self.config['N_FEATURES'], p_number_categories=self.config['P_N_CATEGORIES'], areas=self.areas, families=self.families) correlated = assess_correlation_probabilities(self.p_universal, self.p_contact, self.p_inheritance, corr_th=self.corr_th) if correlated <= self.n_correlated: break if attempts > 10000: attempts = 0 self.corr_th += 0.05 self.n_correlated += 1 print("Correlation threshold for simulation increased to", self.corr_th) print("Number of allowed correlated features increased to", self.n_correlated) # Simulate features self.features, self.states, self.feature_names, self.state_names = \ simulate_features(areas=self.areas, families=self.families, p_universal=self.p_universal, p_contact=self.p_contact, p_inheritance=self.p_inheritance, weights=self.weights, inheritance=self.inheritance) if self.subset: # The data is split into two parts: subset and complement # The subset is used for analysis and the complement to define the prior counts = counts_from_complement(features=self.features, subset=self.sites['subset']) self.prior_universal = {'counts': counts, 'states': self.states} self.network = compute_network(sites=self.sites, subset=self.sites['subset']) sub_idx = np.nonzero(self.sites['subset'])[0] self.areas = self.areas[np.newaxis, 0, sub_idx] self.features = subset_features(features=self.features, subset=self.sites['subset'])