def on_click(self): """ 'Load csv file' button's on_click call-back listener. It will do a couple of things: 1. Get the filename of the csv file 2. Load csv file, and convert the data into the grid data for the Sudoku solver class 3. Open up a pygame graph to show the simulation of solving the puzzle """ init_dir = './Data' name = 'Choose your file' filetype = (("csv files","*.csv")) self.get_filename(init_dir, name, filetype) self.grid = utility.load_csv(self.filename) while len(self.grid) != 81 or len(set(self.grid).difference(set('1234567890'))) != 0: """ This part checks if the csv file is in correct format: 9 * 9 grid with only '1234567890' digits """ self.pop_up("Wrong content in csv file. Make sure it is a 9 * 9 grid with only '1234567890' digits") self.get_filename(init_dir, name, filetype) self.grid = utility.load_csv(self.filename) # Solve the puzzle here self.sudoku = Solver(self.grid) t, final_values = self.sudoku.solve() if final_values == False: self.pop_up('There is no solution for this puzzle. Try another one!') return else: # If puzzle is successfully solved, write to csv file and open pygame to demo the result output = self.filename[:-4] + '_sol.csv' utility.write_csv(output, self.sudoku.final_values) self.open_pygame()
def compute_predictions_metrics(name, algorithm, current_results, labels): # Compute for all each (dataset, algorithm) couple the score for all criterion, on the RUN_NUMBER runs # Note : This function currently recompute all metrics if at least one value is missing (cf. dataset_already_processed() function) with open(RESULTS_FILE, 'a+') as result_file: # For each of the RUN_NUMBER prediction for this case : for run_number in range(RUN_NUMBER): # Loading prediction print("\tCOMPUTING METRICS VALUES FOR PREDICTIONS WITH " + algorithm + "(" + str(run_number + 1) + "/" + str(RUN_NUMBER) + ")") clustering_prediction = utility.load_csv( PREDICTED_CLUSTERING_FOLDER + name + "_" + algorithm + "_" + str(run_number)) # Computing metrics for the prediction number "run_number" metrics_dictionnary = compute_prediction_metrics( labels, clustering_prediction) # Saving values for criteria in CRITERION_LIST: if current_results.find(name + "\t" + algorithm + "\t" + criteria + "\t" + str(run_number)) == -1: result_file.write(name + "\t" + algorithm + "\t" + criteria + "\t" + str(run_number) + "\t" + str(metrics_dictionnary.get(criteria)) + "\n") result_file.close()
def from_nodes_csv(file_path): obj = Tree() data = load_csv(file_path)[1:] for v in data: node = Node(v[0], v[1]) obj.attach_node(node) return obj
# positional parser.add_argument("table", type=str, help="data source table name") # optional parser.add_argument("-x", "--crosstab", type=str, help="crosstab output") parser.add_argument("-c", "--column", type=str, help="column name in the table") parser.add_argument("-r", "--row", type=int, help="number of rows to select") parser.add_argument("-n", "--bins", type=int, help="number of bins to select") args = parser.parse_args() # identify csv / teradata iscsv = args.table.endswith(".csv") # read csv / teradata if iscsv: try: df = load_csv(args.table, args.row) except: print("Oops! Data table not found...") sys.exit else: try: cnxn = connect_td() except: print("Oops! Fail to connect to TD...") sys.exit try: df = load_td(args.table, cnxn, args.row) except: print("Oops! Data table not found...") sys.exit
from categorize import Categorizer import utility as ut import explore as ex import model as ml import numpy as np import transform as tf from imp import reload reload(ut) from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer data_file = "./data/sales_train.csv" df = ut.load_csv(data_file) df.shape df.columns df.describe() ex.get_type_dict(df) # ----------------------------------------------------------------------------- # clean data: remove nan and remove zero target # find missing values ex.count_missing(df) # remove zero contract price targetcol = 'CL_Contract_Price' reload(tf) reload(ml)
import pandas as pd import os, sys # sys.path mlibpath = r"C:\Users\m038402\Documents\myWork\pythoncodes\mlib" sys.path.append(mlibpath) from categorize import Categorizer import utility as ut import explore as ex import model as ml data_file = "./data/data.csv" TARGET_F ="TARGET_F" df = ut.load_csv(data_file) df.head() # normal sklearn split # train_df, test_df = ut.train_test_split_with_target(df, test_size=0.2) # df must be a data frame train_df, test_df, train_target, test_target = ut.train_test_split_with_target(df, test_size=0.2, target=TARGET_F) train_df.shape test_df.shape train_target.shape test_target.shape train_df.columns trainX = ml.full_pipeline.fit_transform(train_df) testX = ml.full_pipeline.fit_transform(test_df)
# sys.path import sys mlibpath = r"C:\Users\m038402\Documents\myWork\pythoncodes\mlib" sys.path.append(mlibpath) from utility import load_csv, load_td, connect_td from categorize import Categorizer data_file = "./data/data.csv" df = load_csv(data_file) df.head() # ------------------------------------------------------------------- # categorizer bprofile = Categorizer(df, "TARGET_F") sdf = df.copy() # mutable... bprofile.categorize(sdf) sdf.head() df.head() bprofile.get_df_column_type() bprofile.get_likelihood("CUNCZZ_AGE_YEARS", False) bprofile = Categorizer(df, "TARGET_F") bprofile.df.head() bprofile.get_crosstab("CUNCZZ_AGE_YEARS", False) # bprofile.unique_values # bprofile.num_bins
from benchmark import Benchmark from invocation import Invocation from execution import Execution ONLY_QCOMP_2020_BENCHMARKS = False QComp2020_benchmarks = [] # if ONLY_QCOMP_2020_BENCHMARKS and len(QComp2020_benchmarks) == 0: from utility import load_csv print( "\n\nNOTE: Will filter on the 100 benchmarks selected for QComp 2020\n" ) QComp2020_benchmarks = [ b[0] + "." + b[3] + "." + b[4] for b in load_csv("qcomp2020_benchmarks.csv") ] assert (len(QComp2020_benchmarks) == 100) def get_name(): """ Returns the name of the tool as listed on http://qcomp.org/competition/2020/ """ return "DFTRES" # https://doi.org/10.1016/j.ress.2019.02.004 def is_benchmark_supported(benchmark: Benchmark): """ Returns True if the provided benchmark is supported by the tool and if the given benchmark should appear on the generated benchmark list """
cols.append('Label') #如果是ARMA等时序预测数据,则自己提供Label if 'Date' not in df1.columns: cols.append('Date') #如果是ARMA等时序预测数据,则自己提供Date if 'StockID' not in df1.columns: cols.append('StockID') #如果是ARMA等时序预测数据,则自己提供StockID return addColumns(df1, df2, 'FileName', cols) #split_date = '2015-11-22' #这是小数据baseline的评估起始点 #split_year,split_month,split_day = 2015,11,22 split_date = '2016-03-23' #大数据 split_year, split_month, split_day = 2016, 3, 23 bARMAWeight = False fnTmpF = load_csv( 'validation\\0117bkupFeatureARMA\\svm-big-rbf-OpenClose-noerror-0117-addFeatureARMA.csv' ) fnFilter = fnTmpF['FileName'].values #fnBuyList='validation\\TotalreturnResult_1.csv' fnBuyList = 'validation\\0117bkupFeatureARMA\\svm-big-rbf-OpenClose-noerror-0117-addFeatureARMA.csv' #fnBuyList='validation\\best 50-100-fm-big-OpenClose-noerror-0117-addFeatureARMA - No_FM.csv' #fnBuyList='validation\\0117bkupFeatureARMA\\knn-big-OpenClose-noerror-0117-addFeatureARMA.csv' #fnBuyList='validation\\TotalreturnResult_1.csv' fndFMData = 'data\\dFM-big-oldLabel.csv' #包含情感倾向和涨跌信息 fnErrorSentiment = 'validation\\errorFileName-baselinebig.txt' dfM = pandas.read_csv(fndFMData, encoding='utf-8') #这是ARMA时序预测模型的结果