def run_experiment(self): # initialize velocity and population # we need anther class that holds current population, velocity, current transformed matrix, # a reference for population row, population iteration, current generation # loop thru all population rows and generate fitness for iteration in range(1, VariableSetting.Iteration + 1): print("iteration loop",iteration) self.current_alpha = VariableSetting.Initial_alpha + self.alpha_scaling_factor #we can put population initialization here for generation in range(1, VariableSetting.Generation + 1): print("generation loop", generation) self.current_alpha = self.current_alpha - self.alpha_scaling_factor self.population_r2_values = np.zeros((VariableSetting.Population_Size, 3)) self.feature_selector.fitness_matrix = [] for population_idx in range(0, VariableSetting.Population_Size): #Test area for single model row [24- 188- 274- 302- 330- 344- 362] self.feature_selector.population_matrix[0] = np.zeros(VariableSetting.No_of_Descriptors) self.feature_selector.population_matrix[0][24] = 1 self.feature_selector.population_matrix[0][188] = 1 self.feature_selector.population_matrix[0][274] = 1 self.feature_selector.population_matrix[0][302] = 1 self.feature_selector.population_matrix[0][330] = 1 self.feature_selector.population_matrix[0][344] = 1 self.feature_selector.population_matrix[0][362] = 1 self.feature_selector.current_population_index = population_idx if self.feature_selector is None: data_inputs = self.data_manager.inputs else: self.run_feature_selection() data_inputs = self.data_manager.transformed_input self.fit_and_evaluate_model(data_inputs) #print("Row", population_idx, "Descriptor", self.feature_selector.sel_descriptors_for_curr_population, "Test r2 value ", self.population_r2_values[population_idx][2]) if(self.population_r2_values[population_idx][2] >= VariableSetting.Required_r2_Test and self.population_r2_values[population_idx][1] >= VariableSetting.Required_r2_Valid and self.population_r2_values[population_idx][0] >= VariableSetting.Required_r2_Train): FileManager.write_model_in_file(self.output_filename , self.feature_selector.sel_descriptors_for_curr_population , self.feature_selector.fitness_matrix[population_idx] , self.population_r2_values[population_idx][0] , self.population_r2_values[population_idx][1] , self.population_r2_values[population_idx][2] ) #Test area for single model row exit(0) self.feature_selector.local_best_matrix = self.feature_selector.get_local_best_matrix() if generation == 1: self.feature_selector.initialize_local_best_fitness_for_first_generation() self.feature_selector.global_best_row = self.feature_selector.get_global_row() #self.print_ones_in_array(self.feature_selector.global_best_row) self.feature_selector.find_next_velocity() self.feature_selector.generate_population_matrix(self.current_alpha) self.feature_selector.current_population_index = 0 #print("lowest fitness index", np.min(self.feature_selector.fitness_matrix), np.argmin(self.feature_selector.fitness_matrix)) print("Global Row fitness", self.feature_selector.global_best_row_fitness )
def read_data_and_set_variable_settings(self, data_file_path, variable_file_path): loaded_data = FileManager.load_file(data_file_path) no_of_drugs = loaded_data.shape[0] no_of_descriptors = loaded_data.shape[1] - 1 # excluding the last column that is the y axis variables = FileManager.load_variable_file(variable_file_path) VariableSetting.set_variables(variables, no_of_drugs, no_of_descriptors) return loaded_data
def read_data_and_set_variable_settings(self, data_file_path, variable_file_path): loaded_data = FileManager.load_file(data_file_path) no_of_drugs = loaded_data.shape[0] no_of_descriptors = loaded_data.shape[ 1] - 1 # excluding the last column that is the y axis variables = FileManager.load_variable_file(variable_file_path) VariableSetting.set_variables(variables, no_of_drugs, no_of_descriptors) return loaded_data
def __init__(self): """ init the creation date used as suffix for the filename init the FileManager used for reading the template and generate tge xml init the template engine """ self.gen_date = "_" + str(datetime.now().month) + "_" + str( datetime.now().year) self.fm = FileManager() self.template = Template( self.fm.io("UserStorieTemplate", path="../assets/", extension=".xml"))
def run_experiment(self): # initialize velocity and population # we need anther class that holds current population, velocity, current transformed matrix, # a reference for population row, population iteration, current generation # loop thru all population rows and generate fitness for iteration in range(1, VariableSetting.Iteration + 1): print("iteration loop", iteration) self.current_alpha = VariableSetting.Initial_alpha + self.alpha_scaling_factor #we can put population initialization here for generation in range(1, VariableSetting.Generation + 1): print("generation loop", generation) self.current_alpha = self.current_alpha - self.alpha_scaling_factor self.population_r2_values = np.zeros( (VariableSetting.Population_Size, 3)) self.feature_selector.fitness_matrix = [] for population_idx in range(0, VariableSetting.Population_Size): self.feature_selector.current_population_index = population_idx if self.feature_selector is None: data_inputs = self.data_manager.inputs else: self.run_feature_selection() data_inputs = self.data_manager.transformed_input self.fit_and_evaluate_model(data_inputs) #print("Row", population_idx, "Descriptor", self.feature_selector.sel_descriptors_for_curr_population, "Test r2 value ", self.population_r2_values[population_idx][2]) FileManager.write_model_in_file( self.output_filename, self.feature_selector. sel_descriptors_for_curr_population, self.feature_selector.fitness_matrix[population_idx], type(self.model), self.population_r2_values[population_idx][0], self.population_r2_values[population_idx][1], self.population_r2_values[population_idx][2]) self.feature_selector.local_best_matrix = self.feature_selector.get_local_best_matrix( ) if generation == 1: self.feature_selector.initialize_local_best_fitness_for_first_generation( ) self.feature_selector.global_best_row = self.feature_selector.get_global_row( ) #self.print_ones_in_array(self.feature_selector.global_best_row) self.feature_selector.find_next_velocity() self.feature_selector.generate_population_matrix( self.current_alpha) self.feature_selector.current_population_index = 0 #print("lowest fitness index", np.min(self.feature_selector.fitness_matrix), np.argmin(self.feature_selector.fitness_matrix)) print("Global Row fitness", self.feature_selector.global_best_row_fitness)
def updateExcel(self, args: dict) -> CommandHandlerResponse: if not (self.timeIntervalIsSufficient(Function.updateExcel)): return CommandHandlerResponse( responseInfo=ResponseInfo.TimeIntervalIsNotSufficient) self.updateLastExecutionTime(Function.updateExcel) # try: # filePath = args["excelPath"] # diapasonsPath = args["diapasonsPath"] # ... # except Some--Exception: # raise InvalidArgumrent Exception # or CommandHandlerResponse excelPath = args[Request.filePath] diapasonsPath = "../data/diapasonsDad.csv" firmsPath = "../data/firms.csv" excelManager = ExcelManager(excelPath) try: diapasons = FileManager.readCSVto2DimensionalList( diapasonsPath, " ") firms = FileManager.readCSVtoDict(firmsPath, ",") FileValidator.checkDataFilesForCorrectness(diapasons, 5, diapasonsPath, firms, 2, firmsPath) excelManager.updateExcel(diapasons, firms) return CommandHandlerResponse( responseInfo=ResponseInfo.OK, successMessage="\nExcel was updated successfully!") except FileNotFoundError as e: return CommandHandlerResponse( responseInfo=ResponseInfo.FileNotFoundException, errorsMessage=str(e)) except InvalidDataFileException as e: return CommandHandlerResponse( responseInfo=ResponseInfo.InvalidDataFileException, errorsMessage=str(e)) except InvalidLinkException as e: return CommandHandlerResponse( responseInfo=ResponseInfo.InvalidLinkException, successMessage="\nExcel was partially updated.", errorsMessage=str(e))
def test_fit(self): file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv" loaded_data = FileManager.load_file(file_path) data_manager = DataManager(normalizer=None) data_manager.set_data(loaded_data) data_manager.split_data_into_train_valid_test_sets(test_split=0.15, train_split=0.70) model = svm.SVR() velocity = Velocity() velocity_matrix = velocity.create_first_velocity() # define the first population # validation of a row generating random row for population = Population(velocity_matrix=velocity_matrix) population.create_first_population() debpso = DEBPSO(population.population_matrix[1]) debpso.fit(data_manager.inputs[SplitTypes.Train], data_manager.targets[SplitTypes.Train]) print("Population 1 row sum ", population.population_matrix[1].sum()) print("Selected feature descriptors", debpso.sel_descriptors_for_curr_population)
class UserStories: """ Generate the User stories """ def __init__(self): """ init the creation date used as suffix for the filename init the FileManager used for reading the template and generate tge xml init the template engine """ self.gen_date = "_" + str(datetime.now().month) + "_" + str( datetime.now().year) self.fm = FileManager() self.template = Template( self.fm.io("UserStorieTemplate", path="../assets/", extension=".xml")) @required([ "StorieName", "CustomerType", "Need", "Description", "DoD", "TimeCharge" ]) def gen_user_stories(self, stories_info): """ This function generate an xml that has been filled with the stories_info dict passed as parameter using the template engine @param stories_info: a dict that contain all the filed needed to generate a user stories """ for k, info in stories_info.items(): info = info.replace("&", "&") info = info.replace("\"", """) stories_info.update({k: info}) print("Generating", stories_info.get("StorieName")) try: self.fm.io( stories_info.get("StorieName"), path="../xml/", extension=self.gen_date + ".xml", content=self.template.substitute(stories_info).encode('utf-8')) except TypeError as err: print(err) sys.exit("[ERR] UserStories: a template error occurred") except ValueError as err: print(err) sys.exit("[ERR] UserStories: a template error occurred")
def test1(): fm = FileManager() # data = fm.read_input("c_memorable_moments.txt") data = fm.read_input("b_lovely_landscapes.txt") minH = 999999999999 minV = 999999999999 maxV = -1 maxH = -1 for image in data['images']: if image['type'] == 'V': minV = min(minV, len(image['tags'])) maxV = max(maxV, len(image['tags'])) else: minH = min(minH, len(image['tags'])) maxH = max(maxH, len(image['tags'])) print(minH) print(minV) print(maxH) print(maxV)
def main(): """ @return: """ sprint = AsanaWrapper(os.getenv("ASANA_KEY")) pld_json = sprint.get_sprint_tasks([os.getenv("TASK")]) gen = DiagramGenerator() dump(pld_json) gen.create_xml_tree("Terradia", pld_json) FileManager().generate_svg_from_xml() return 0
def test_file_manager(self): file_manager = FileManager() file_manager.load_file("../Datasets/test.data") normalize_data_1 = [[2.0, 2.0, 2.0, 2.0, [0, 1, 0]]] normalize_data_2 = [[1.0, 1.0, 1.0, 1.0, [1, 0, 0]]] self.assertEqual(file_manager.get_train_data(), normalize_data_1) self.assertEqual(file_manager.get_test_data(), normalize_data_2)
def __init__(self, initial_population, generations): """ A genetic algorithm is used to learn the weights and bias of a topology fixed network. """ super().__init__(initial_population) #self.expected_precision = expected_precision self.generation_span = generations self.precision = 0 self.epoch = 0 self.num_inputs = 4 self.neurons_per_layer = [self.num_inputs, 4, 3] # Build Fixed Neural Network, with 4 inputs self.neural_network = NeuralNetwork(self.num_inputs) # The neural network has 3 layers with 3,4 and 3 neurons in each self.neural_network.buildFixed(self.neurons_per_layer) self.test_values = 20 # Parse data set file_manager = FileManager() file_manager.load_file("../Datasets/iris.data") self.train_data = file_manager.get_train_data() self.test_data = file_manager.get_test_data() self.neurons_position = [] self.x_plot = [] self.y_plot = []
def main(args): ## deve ler as informações do arquivo fm = FileManager(args.arquivo) rs = RecommenderSystem(fm) ## - O número de itens avaliados pelo Usuário X print( rs.getUser(args.usuario).getReviewsLength() ) ## - O número de usuários que avaliaram o Item Y print( rs.getItem(args.item).getReviewsLength() ) ## - Se o Usuário X avaliou o Item Y ## r<sub>x,y</sub> if rs.hasRating(args.usuario, args.item): print( rs.getRating(args.usuario, args.item) ) ## - Se o Usuário X não avaliou o Item Y ## pred(r<sub>x,y</sub>) usando abordagem baseada em usuários (Seção 2.1.1) ## pred(r<sub>x,y</sub>) usando abordagem baseada em itens (Seção 2.2.1) else: print( rs.getUserBasedPrediction(args.usuario, args.item) ) print( rs.getItemBasedPrediction(args.usuario, args.item) )
def main(): # Parse data set file_manager = FileManager() file_manager.load_file("../Datasets/iris.data") train_data = file_manager.get_train_data() test_data = file_manager.get_test_data() number_of_epochs = 2000 # Training data can be shuffled # shuffle(train_data) """ Genetic Algorithm (Tarea 3) """ # ------------------------------------------------- genetic = GeneticFixedTopology(100, 1000) best_neural_network = genetic.run() genetic.plot_results()
def test_fit(self): file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv" loaded_data = FileManager.load_file(file_path) data_manager = DataManager(normalizer=None) data_manager.set_data(loaded_data) data_manager.split_data_into_train_valid_test_sets(test_split=0.15, train_split=0.70) model = svm.SVR() velocity = Velocity() velocity_matrix = velocity.create_first_velocity() # define the first population # validation of a row generating random row for population = Population(velocity_matrix=velocity_matrix) population.create_first_population() debpso = DEBPSO(population.population_matrix[1]) debpso.fit(data_manager.inputs[SplitTypes.Train], data_manager.targets[SplitTypes.Train]) print("Population 1 row sum ", population.population_matrix[1].sum()) print("Selected feature descriptors",debpso.sel_descriptors_for_curr_population)
def read_csv(filename, header=0, sep=';'): fileManager = FileManager(filename, header, sep) return pdFakeFile(fileManager)
def build(self): self.__createFile() return FileManager(self.filename)
def test_clean_sex(users_sample): df = FileManager._clean_sex(users_sample) assert df["sex"].iloc[0] == "F"
import numpy as np import Bio print(Bio.__version__) exit(0) from src.FileManager import FileManager __author__ = 'FalguniT' #Ehux JGI Fasta file Ehux_JGI_file_path = "../data/Ehux_JGI.fasta" Ehux_JGI_data = FileManager.load_file(Ehux_JGI_file_path) #Geph Blast output file Geph_file_path = "../data/Ehux_Geph_Blast_060916.txt.p1" Geph_data = FileManager.load_file(Geph_file_path) Geph_count = len(Geph_data) print("geph count", Geph_count) #ISO blast output file ISO_file_path = "../data/Ehux_ISO_Blast_060916.txt.p1" ISO_data = FileManager.load_file(ISO_file_path) ISO_count = len(ISO_data) print("ISO_ count", ISO_count) #Strains 92A blast strains_92A_file_path = "../data/Ehux_strains_92A_Blast_060816.txt.p1" strains_92A_data = FileManager.load_file(strains_92A_file_path) strains_92A_count = len(strains_92A_data)
def test2(): fm = FileManager() data = fm.read_input("c_memorable_moments.txt") # data = fm.read_input("b_lovely_landscapes.txt") foo = SlideShower(data) foo.main()
class DiagramGenerator: """ The DiagramGenerator regroup all the function that is needed to create the xml's and svg's files """ def __init__(self): self.gen_date = "_" + str(datetime.now().month) + "_" + str( datetime.now().year) self.UserStorie = UserStories() self.fm = FileManager() @staticmethod def init_xml_tree(root_name): """ Create the base xml element of a jgraph xml file format using the Page class that provide some base element, the root cell zone and the root cell itself @param root_name: the name if the diagram's root cell @return: The Page class where the base xml tree is stored """ page = Page() page.root_group_cell = RootArea(page) cell = Cell(page, page.root_group_cell, root_name) page.root_group_cell.append_child(cell) page.deliverable_group_cell = DeliverableArea(page) return page def parse_storie_info(self, storie_name, storie_info): if storie_info == "": return None storie_info = storie_info.split(";") if len(storie_info) < 5 or len(storie_info) > 7: return None storie = { "StorieName": storie_name, "CustomerType": storie_info[0].strip('\n ,;'), "Need": storie_info[1].strip('\n ,;'), "Description": storie_info[2].strip('\n ,;'), "DoD": storie_info[3].strip('\n ,;'), "TimeCharge": storie_info[4].strip('\n ,;') } return storie def create_xml_tree(self, root_name, diagram_dict): """ Loop through the diagram_dict param to create the xml tree Recursively called if another dict is found @param root_name: the name if the diagram's root cell @param diagram_dict: the retrieved dictionary from asana used to create the xml tree """ page = self.init_xml_tree(root_name) for deliverable, cards in sorted(diagram_dict.items()): cell = Cell(page, page.deliverable_group_cell, deliverable) page.deliverable_group_cell.append_child(cell) card_group = CardAreas(page) page.deliverable_group_cell.append_child(card_group) if isinstance(cards, dict): self.create_xml_tree(deliverable, cards) for card in cards: cell = Cell(page, card_group, card) card_group.append_child(cell) if isinstance(cards, list): for card in cards: cell = Cell(page, card_group, card["storie"], card["done"]) card_group.append_child(cell) storie = self.parse_storie_info(card['storie'], card["storie_info"]) if storie: self.UserStorie.gen_user_stories(storie) self.fm.io(root_name, path="../xml/", extension=self.gen_date + ".xml", content=Et.tostring(page.tree, encoding="UTF-8")) self.fm.close()
def test_clean_real_estate(ads_sample): df = FileManager._clean_real_estate(ads_sample) assert df["category"].iloc[0] == "real_estate" assert df["category"].iloc[1] == "real_estate" assert df["category"].iloc[2] == "real_estate" assert df["category"].iloc[3] == "real_estate"
from sklearn.preprocessing import MinMaxScaler from src.Population import Population from src.ReadData import ReadData from src.SplitTypes import SplitTypes from src.FileManager import FileManager from src.DataManager import DataManager from src.VariableSetting import VariableSetting from src.Velocity import Velocity read_data = ReadData() loaded_data = read_data.read_data_and_set_variable_settings( "../Dataset/00-91-Drugs-All-In-One-File.csv", "../Dataset/VariableSetting.csv") output_filename = FileManager.create_output_file() #normalizer = ZeroOneMinMaxNormalizer() #normalizer = MinMaxScaler() normalizer = None data_manager = DataManager(normalizer=normalizer) data_manager.set_data(loaded_data) data_manager.split_data_into_train_valid_test_sets() #data_manager.feature_selector = debpso #set feature selection algorithm based on variable settings feature_selection_algo = None if VariableSetting.Feature_Selection_Algorithm == 'DEBPSO': feature_selection_algo = DEBPSO() if VariableSetting.Feature_Selection_Algorithm == 'LinearSVC': feature_selection_algo = LinearSVC()
from src.FileManager import FileManager from src.DataManager import DataManager from src.Population import Population from src.Normalizer import * no_of_populations = 50 # should be 50 population no_of_descriptors = 385 # should be 385 descriptors unfit = 1000 required_r2 = {} required_r2[SplitTypes.Train] = .6 required_r2[SplitTypes.Valid] = .5 required_r2[SplitTypes.Test] = .5 file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv" loaded_data = FileManager.load_file(file_path) output_filename = FileManager.create_output_file() #rescaling_normalizer = RescalingNormalizer() #scikit_normalizer = ScikitNormalizer() #data_manager = DataManager(normalizer=scikit_normalizer) data_manager = DataManager(normalizer=None) data_manager.set_data(loaded_data) data_manager.split_data(test_split=0.15, train_split=0.70) model = svm.SVR() population = Population() population.load_data()
from CustomExceptions.InvalidDataFileException import InvalidDataFileException from CustomExceptions.InvalidLinkException import InvalidLinkException from src.FileManager import FileManager from src.ExcelManager import ExcelManager from src.FileValidator import FileValidator excelPath = "hello_world.xlsx" diapasonsPath = "../data/diapasons.csv" firmsPath = "../data/firms.csv" excelManager = ExcelManager(excelPath) diapasons = FileManager.readCSVto2DimensionalList(diapasonsPath, " ") firms = FileManager.readCSVtoDict(firmsPath, ",") # for key, value in firms: # print("Firms:\n========================\n") # print(key, " : ", value) # for key in firms.keys(): # print(key, " : ", firms[key]) # print("Firms:\n========================\n", firms) # print(FileManager.readCSVtoDict(firms, ",")) try: FileValidator.checkDataFilesForCorrectness(diapasons, 5, diapasonsPath, firms, 2, firmsPath) print("Data files was loaded successfully.") except InvalidDataFileException as e: print("Exceptions during loading the data files: ", e) try:
if __name__ == "__main__": # Argparse parser = argparse.ArgumentParser() parser.add_argument("--report", action="store_true") args = parser.parse_args() report = args.report # Env file load_dotenv() db_user = os.getenv("POSTGRES_USER", os.getenv("DB_USER")) db_pwd = os.getenv("POSTGRES_PASSWORD", os.getenv("DB_PWD")) db_name = os.getenv("POSTGRES_DB", os.getenv("DB_NAME")) provider = os.getenv("PROVIDER", "postgresql") port = os.getenv("port", "5432") # Cleaning & inserting fm = FileManager(db_user, db_pwd, db_name, provider, port) users = fm.clean_users() ads = fm.clean_ads() referrals = fm.clean_referrals() ads_transaction = fm.clean_ads_transaction() # Report if report: rm = ReportManager(users=users, ads=ads, referrals=referrals, ads_transaction=ads_transaction) rm.process()
def __init__(self): self.gen_date = "_" + str(datetime.now().month) + "_" + str( datetime.now().year) self.UserStorie = UserStories() self.fm = FileManager()
import sys import tkinter as tk from typing import List from src.load import config from src.FileManager import FileManager from src.StateManager import StateManager if __name__ == '__main__': filename = sys.argv[1] fm = FileManager(filename, config['rows'], config['columns']) fm.read() if (fm.is_data_corrupt()): decision = input('data file is corrupt, fix it automatically? (y/n): ') if (decision == 'y' or decision == 'Y'): fm.fix_data() else: sys.exit() fm.write() app = tk.Tk() sm = StateManager(fm.data, fm.rows, fm.columns, config['pixel_on_hex_color'], config['pixel_off_hex_color']) header_section0 = tk.Frame(app) tk.Button(header_section0, text='Save', command=fm.write, highlightbackground=config['save_button_color']).pack() header_section0.pack()