def dataset(): # create a temporary dataset dataset = manage_dataset.create_dataset(PROJECT_ID) # import some data to it import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI) yield dataset # tear down manage_dataset.delete_dataset(dataset.name)
def dataset(): # create a temporary dataset dataset = manage_dataset.create_dataset(PROJECT_ID) # import some data to it import_data.import_data(dataset.name, 'VIDEO', INPUT_GCS_URI) yield dataset # tear down manage_dataset.delete_dataset(dataset.name)
def cost_function1(DATA_FOLDER1, DATA_FOLDER2): stations1 = importd.import_data(DATA_FOLDER1) stations2 = importd.import_data(DATA_FOLDER2) values1, values2, times, fact = preconditionnement(stations1, stations2) cost = 0 if stations1.keys() == stations2.keys(): for key in values1.keys(): for j in range(len(times) - 1): cost += abs(values1[key][j] - values2[key][j]) * ( times[j + 1] - times[j]) #carrés à droite return cost * fact * 10**-4 print("incompatible data")
def cost_function0(DATA_FOLDER1, DATA_FOLDER2): stations1 = importd.import_data(DATA_FOLDER1) stations2 = importd.import_data(DATA_FOLDER2) cost = 0 if stations1.keys() == stations2.keys(): for keys in stations1.keys(): cost += abs(stations1[keys].initial_time - stations2[keys].initial_time) * alpha + abs( stations1[keys].first_peak_value - stations2[keys].first_peak_value) * beta return cost print("incompatible data")
def main(): print "Starting Data Import..." import_data.import_data() print "Data Import Complete" raw_input("Press Enter to Start Data Parsing") length = 225 # trading days: 225 days about 10.5 month or 45 weeks event = "pdufa" # choice: "nda", "adcom", "pdufa" goodlist = get_filtered_list(event) directionlist = ["around", "toward", "after"] sizelist = ["all" , "small" , "mid", "large"] for i in range(len(directionlist)): for j in range(len(sizelist)): write_data(goodlist, event, directionlist[i], length, sizelist[j]) print "Data Parsing Complete"
def main(): """ This function contains example code that demonstrates how to use the functions defined in poly_fit_base for fitting polynomial curves to data. """ # choose number of data-points and sample a pair of vectors: the input # values and the corresponding target values # N = 50 # inputs, targets = sample_data(N, arbitrary_function_2, seed=1) wine_data, wine_features = import_data('winequality-red.csv') inputs = wine_data[:, 0:11] targets = wine_data[:, 11] # specify the centres and scale of some rbf basis functions default_centres = np.linspace(0,1,21) default_scale = 0.03 default_reg_param = 0.08 # get the cross-validation folds num_folds = 5 folds = create_cv_folds(inputs.shape[0], num_folds) # # evaluate then plot the performance of different reg params evaluate_reg_param(inputs, targets, folds, default_centres, default_scale) # # evaluate then plot the performance of different scales # evaluate_scale(inputs, targets, folds, default_centres, default_reg_param) # # evaluate then plot the performance of different numbers of basis # # function centres. evaluate_num_centres( inputs, targets, folds, default_scale, default_reg_param) plt.show()
def main(): wine_data, wine_features = import_data('winequality-red.csv') #matrix of features inputmtx = wine_data[:, 0:11] #array of targets targets = wine_data[:, 11] N = len(targets) # set the k's and number of cross-v folds Ks = 160 num_folds = 10 # create cv folds folds = create_cv_folds(N, num_folds) # return an error of k x M rmse errors errormtxs = cv_evaluation(inputmtx, targets, folds, Ks) errormean = np.zeros((Ks, 5)) for fold in errormtxs: matrix = errormtxs[fold] errormean = matrix + errormean errormean = errormean / num_folds threemtx = errormean[0, :, :] ks = np.linspace(1, Ks, Ks) display_error_graphs(threemtx, ks) print("AVERAGE ERRORS") print_errors_2d_mtx(threemtx, ks)
def poly_model_reg(): """ Loops through a range of different regression coefficents to find the optimum then plots the error and comparison graphs """ wine_data, wine_features = import_data('winequality-red.csv') inputmtx = wine_data[:, 0:11] # inputmtx = wine_data[:, [1,9,10]] # For the improved Regression Answer targets = wine_data[:, 11] train_inputmtx, train_targets, test_inputmtx, test_targets = \ cross_validation(inputmtx, targets, 0.25) reg_coeffs = np.linspace(0, 1, 51) degrees = np.linspace(1, 15, 15) errormtx = np.zeros((len(reg_coeffs), len(degrees))) threemtx = np.zeros((len(reg_coeffs), len(degrees), 5)) for i in range(int(len(errormtx))): for j in range(int(len(errormtx[i]))): outputmtx = expand_to_2Dmonomials(train_inputmtx, int(degrees[j])) weights = regularised_ml_weights(outputmtx, train_targets, reg_coeffs[i]) prediction_func = construct_3dpoly(int(degrees[j]), weights) prediction_values = prediction_func(test_inputmtx) errorarr = error_score(test_targets, prediction_values) for k in range(0, 5): threemtx[i, j, k] = errorarr[k] display_error_graphs(threemtx, degrees) display_3d_error_graphs(threemtx, reg_coeffs, degrees) print_errors_3d_mtx(threemtx, reg_coeffs, degrees)
def mean_hr_bpm(filename): """module to take user input for time scale, and analyze ECG input in \ that time scale :param filename: the name of a file located in the /test_data folder \ entered as a string :returns heartrate: heartrate during a specific period as a float :raises IOError: raised if user tries to input value not accepted by \ program :raises ValueError: raised if the generally accepted values fall outside \ of the signal time range """ # time_input = input("Please input time (10 sec or 20 sec): ") time_input = "10 sec" time_vector = extract_time_data(filename) if np.max(time_vector) >= float(time_input[:-4]): if str(time_input) == "10" + " sec": ind = np.where(time_vector == 10)[0] df = import_data(filename) values = df.values trimmed = values[np.arange(0, ind), 1] trim_norm = trimmed - np.mean(trimmed) template = pd.read_csv("test_data/template.csv", header=None) norm_template = extract_template_data(template) corr = np.correlate(norm_template, trim_norm, mode="full") peaks = signal.find_peaks_cwt(corr, np.arange(1, 300)) heartrate = len(peaks) / (10 / 60) elif str(time_input) == "20" + " sec": ind = np.where(time_vector == 20)[0] df = import_data(filename) values = df.values trimmed = values[np.arange(0, ind), 1] trim_norm = trimmed - np.mean(trimmed) template = pd.read_csv("test_data/template.csv", header=None) norm_template = extract_template_data(template) corr = np.correlate(norm_template, trim_norm, mode="full") peaks = signal.find_peaks_cwt(corr, np.arange(1, 300)) heartrate = len(peaks) / (20 / 60) else: raise IOError("Invalid input. Try Again (Make sure to include " "sec)") else: raise ValueError("Attempted input outside signal range") return heartrate
def import_ref(self): file_name = "spectra.inp" output_file = os.path.join(self.path, file_name) config_file = os.path.join(self.path, file_name + "import.inp") self.im = import_data(output_file, config_file) self.im.run() self.update()
def import_data(self): target = os.path.join(get_sim_path(), "fit_data" + str(self.index) + ".inp") config = os.path.join(get_sim_path(), "fit_import_config" + str(self.index) + ".inp") self.im = import_data(target, config) self.im.run() self.update()
def mean_hr_bpm(filename): """module to take user input for time scale, and analyze ECG input in \ that time scale :param filename: the name of a file located in the /test_data folder \ entered as a string :returns heartrate: heartrate during a specific period as a float :raises IOError: raised if user tries to input value not accepted by \ program :raises ValueError: raised if the generally accepted values fall outside \ of the signal time range """ # time_input = input("Please input time (10 sec or 20 sec): ") time_input = "10 sec" time_vector = extract_time_data(filename) if np.max(time_vector) >= float(time_input[:-4]): if str(time_input) == "10" + " sec": ind = np.where(time_vector == 10)[0] df = import_data(filename) values = df.values trimmed = values[np.arange(0, ind), 1] trim_norm = trimmed - np.mean(trimmed) template = pd.read_csv("test_data/template.csv", header=None) norm_template = extract_template_data(template) corr = np.correlate(norm_template, trim_norm, mode="full") peaks = signal.find_peaks_cwt(corr, np.arange(1, 300)) heartrate = len(peaks) / (10/60) elif str(time_input) == "20" + " sec": ind = np.where(time_vector == 20)[0] df = import_data(filename) values = df.values trimmed = values[np.arange(0, ind), 1] trim_norm = trimmed - np.mean(trimmed) template = pd.read_csv("test_data/template.csv", header=None) norm_template = extract_template_data(template) corr = np.correlate(norm_template, trim_norm, mode="full") peaks = signal.find_peaks_cwt(corr, np.arange(1, 300)) heartrate = len(peaks) / (20/60) else: raise IOError("Invalid input. Try Again (Make sure to include " "sec)") else: raise ValueError("Attempted input outside signal range") return heartrate
def output_test(): data = import_data.import_data('Align_Pixel_RGB1.csv') data = shuffle(data) train_size = 100000 test_size = 3000 print('--------------start split data----------------') data_test = data[-test_size:] data_test.to_csv('Align_Pixel_test.csv', index=None)
def correlation(distance_field, data_folder): stations = import_data(data_folder) initial_detection_times = np.array( [stations[s].initial_time for s in stations]) print(initial_detection_times) normalized_initial_detection_times = initial_detection_times / np.linalg.norm( initial_detection_times) normalized_distance_field = distance_field / np.linalg.norm( distance_field, axis=2, keepdims=True) # return np.linalg.norm(normalized_distance_field - normalized_initial_detection_times,axis=2) return np.tensordot(normalized_distance_field, normalized_initial_detection_times, axes=[2, 0])
def import_ref(self): file_name=None if self.notebook.tabText(self.notebook.currentIndex()).strip()==_("Absorption"): file_name="alpha.omat" if self.notebook.tabText(self.notebook.currentIndex()).strip()==_("Refractive index"): file_name="n.omat" if file_name!=None: output_file=os.path.join(self.path,file_name) config_file=os.path.join(self.path,file_name+"import.inp") self.im=import_data(output_file,config_file) self.im.run() self.update()
def poly_model_reg(): """ Loops through a range of different regression coefficents to find the optimum then plots the error and comparison graphs """ # Collects wine data and spilts accoriding to features (inputs) and labels (targets) wine_data, wine_features = import_data('winequality-red.csv') # inputmtx = wine_data[:, 0:11] # print(wine_features[0,1,2,4,6,7,9,10]) inputmtx = wine_data[:, (0,1,2,4,6,7,9,10)] targets = wine_data[:, 11] # Reserve data for model validation inputmtx, targets, final_inputs, final_targets = cross_validation(inputmtx, targets, 0.1) # Create Folds num_folds = 10 N = len(targets) folds = create_cv_folds(N, num_folds) # Set variables and then train and test the model for each fold reg_coeffs = np.linspace(0, 1, 51) degrees = np.linspace(1, 5, 5) errors, weights = cv_evaluation_poly_model(inputmtx, targets, folds, degrees, reg_coeffs) # Collocate errors for each fold to and find the mean errors. errormean = np.zeros((len(reg_coeffs), len(degrees), 5)) for fold in errors: matrix = errors[fold] errormean = matrix + errormean errormean = errormean / num_folds errormean = errormean[0, :, :, :] # Print Errors min_reg, min_deg = print_errors_3d_mtx(errormean, reg_coeffs, degrees) error_deg = errormean[0, :, :] display_error_graphs(error_deg, degrees, "Degree Factor", "Change in Polynomial Degrees", "deg") error_reg = errormean[:, 2, :] display_error_graphs(error_reg, reg_coeffs, "Regression Coefficient ($\lambda$)", "Change in Regression Coefficient", "reg") # Create aggreate final model across all the folds: min_reg_index = reg_coeffs.tolist().index(min_reg) min_deg_index = degrees.tolist().index(min_deg) weightsksize = int(max(degrees) * inputmtx.shape[1]) weightsmean = findbestweights(weights, min_reg_index, min_deg_index, weightsksize) test_optimised_model(min_deg, weightsmean, final_inputs, final_targets)
def extract_time_data(filename): """pulls time data out of pandas data frame from ECG input :param filename: the name of a file located in the /test_data folder \ entered as a string :returns time: array of time values from ECG input """ from import_data import import_data df = import_data(filename) values = df.values time = values[:, 0] logging.info("extract_time_data: time data found") logging.debug("time="+str(time)) return time
def extract_time_data(filename): """pulls time data out of pandas data frame from ECG input :param filename: the name of a file located in the /test_data folder \ entered as a string :returns time: array of time values from ECG input """ from import_data import import_data df = import_data(filename) values = df.values time = values[:, 0] logging.info("extract_time_data: time data found") logging.debug("time=" + str(time)) return time
def SubsetSLEProjections(sample_dict): # SLE projection directory #sle_dir = "191220_emulated" #sle_dir = "201011_proj_TIMESERIES" sle_dir = "2lm_projections" # Initialize the output dictionary sle_dict = {"ice_source": [], "region": [], "year": [], "scenario-sample": [],\ "GSAT": [], "SLE": []} # Loop over the required SSPs from the sample dictionary for this_scenario in sample_dict.keys(): # Open this matched scenario file filename = os.path.join( sle_dir, "projections_FAIR_{0}.csv".format(this_scenario.upper())) this_sle_dict = import_data(filename, "FAIR") # Filter this data for the appropriate samples this_sle_dict = filter_data(this_sle_dict, "FAIR", sample=sample_dict[this_scenario], ice_source="Glaciers") # Append these data to the output structure sle_dict["ice_source"].extend(this_sle_dict["ice_source"]) sle_dict["region"].extend(this_sle_dict["region"]) sle_dict["year"].extend(this_sle_dict["year"]) sle_dict["GSAT"].extend(this_sle_dict["GSAT"]) sle_dict["SLE"].extend(this_sle_dict["SLE"]) # Add a field called "scenario-sample" to the dictionary scenario_sample = [ "{0}-{1}".format(this_scenario, x) for x in this_sle_dict["sample"] ] sle_dict["scenario-sample"].extend(scenario_sample) # Convert everything over into numpy arrays sle_dict["ice_source"] = np.array(sle_dict["ice_source"]) sle_dict["region"] = np.array(sle_dict["region"]) sle_dict["year"] = np.array(sle_dict["year"]) sle_dict["GSAT"] = np.array(sle_dict["GSAT"]) sle_dict["SLE"] = np.array(sle_dict["SLE"]) sle_dict["scenario-sample"] = np.array(sle_dict["scenario-sample"]) # Return the sea level projection dictionary return (sle_dict)
def extract_voltage_data(filename): """pulls voltage data out of pandas data frame and normalizes values :param filename: the name of a file located in the /test_data folder \ entered as a string :returns norm_voltage: normalized voltage data """ from import_data import import_data df = import_data(filename) values = df.values voltage = values[:, 1] norm_voltage = voltage - np.mean(voltage) logging.info("extract_voltage_data: norm_voltage found") logging.debug("norm_voltage="+str(norm_voltage)) return norm_voltage
def extract_voltage_data(filename): """pulls voltage data out of pandas data frame and normalizes values :param filename: the name of a file located in the /test_data folder \ entered as a string :returns norm_voltage: normalized voltage data """ from import_data import import_data df = import_data(filename) values = df.values voltage = values[:, 1] norm_voltage = voltage - np.mean(voltage) logging.info("extract_voltage_data: norm_voltage found") logging.debug("norm_voltage=" + str(norm_voltage)) return norm_voltage
def duration(filename): """module that determines the duration of the ECG input signal :param filename: the file name of the ECG input :returns duration: duration of signal as a float (sec) """ import pandas as pd from import_data import import_data logging.info("duration: everything imported") df = import_data(filename) df.columns = ["time", "voltage"] duration = df["time"].max() logging.info("duration: duration found") logging.debug("duration="+str(duration)) return duration
def main(): wine_data, wine_features = import_data('winequality-red.csv') inputmtx = wine_data[:, 0:10] targets = wine_data[:, 11] train_inputmtx, train_targets, test_inputmtx, test_targets = cross_validation( inputmtx, targets, 0.25) train = wine_data[0:250, :] target = wine_data[250:1599, :] N = len(wine_data[:, 11]) print(N) folds = create_cv_folds(N, 2) print(folds)
def duration(filename): """module that determines the duration of the ECG input signal :param filename: the file name of the ECG input :returns duration: duration of signal as a float (sec) """ import pandas as pd from import_data import import_data logging.info("duration: everything imported") df = import_data(filename) df.columns = ["time", "voltage"] duration = df["time"].max() logging.info("duration: duration found") logging.debug("duration=" + str(duration)) return duration
def voltage_extremes(filename): """module to calculate the maximum and minimum lead voltages of the input \ ECG data :param filename: the name of a file located in the /test_data folder \ entered as a string :returns voltage_extremes: lead voltage minimum and maximum as tuple \ (mV) """ import pandas as pd from import_data import import_data df = import_data(filename) df.columns = ["time", "voltage"] voltage_min = df["voltage"].min() voltage_max = df["voltage"].max() voltage_extremes = (voltage_min, voltage_max) logging.info("voltage_extremes: voltage_extremes found") logging.debug("voltage_extremes="+str(voltage_extremes)) return voltage_extremes
def voltage_extremes(filename): """module to calculate the maximum and minimum lead voltages of the input \ ECG data :param filename: the name of a file located in the /test_data folder \ entered as a string :returns voltage_extremes: lead voltage minimum and maximum as tuple \ (mV) """ import pandas as pd from import_data import import_data df = import_data(filename) df.columns = ["time", "voltage"] voltage_min = df["voltage"].min() voltage_max = df["voltage"].max() voltage_extremes = (voltage_min, voltage_max) logging.info("voltage_extremes: voltage_extremes found") logging.debug("voltage_extremes=" + str(voltage_extremes)) return voltage_extremes
def poly_model_reg(): """ Loops through a range of different regression coefficents to find the optimum then plots the error and comparison graphs """ wine_data, wine_features = import_data('winequality-red.csv') inputmtx = wine_data[:, 0:11] # inputmtx = wine_data[:, (0, 1, 2, 4, 6, 7, 9, 10)] targets = wine_data[:, 11] # Create Folds num_folds = 10 N = len(targets) folds = create_cv_folds(N, num_folds) errormtxs, uppers, lowers, prediction_values = cv_evaluation_poly_model( inputmtx, targets, folds, num_folds) preds_arry = np.zeros(160) upper_arr = np.zeros(160) lower_arr = np.zeros(160) for x in range(0, prediction_values.shape[1]): preds_arry[x] = np.mean(prediction_values[:, x]) upper_arr[x] = np.mean(uppers[:, x]) lower_arr[x] = np.mean(lowers[:, x]) errormean = np.zeros((1, 5)) for fold in errormtxs: matrix = errormtxs[fold] errormean = matrix + errormean errormean = errormean / num_folds errormean = errormean[0, :, :] it_priors = np.linspace(1, 50, 50) display_error_graphs(errormean, it_priors, "Number of Priors", "Change in Priors", "priors") display_bayesian_confidence_graph(preds_arry, upper_arr, lower_arr) min_rsme_index = np.argmin(errormean, 0)[0] print_error_score(errormean[min_rsme_index])
import functions import numpy as np import pandas as pd from scipy import signal import seaborn as sns from sklearn.preprocessing import PowerTransformer import matplotlib.pyplot as plt #from mpl_toolkits.axes_grid1.inset_locator import inset_axes from matplotlib import rc rc('font', **{'family': 'serif', 'serif': ['Roman']}) rc('text', usetex=True) plt.rcParams.update({'font.size': 16}) plt.rcParams['text.latex.preamble'] = [r"\usepackage{amsmath}"] plt.rcParams['text.latex.preamble'] = [r"\usepackage{bm}"] no2, nox, pm10, meteo, time = import_data.import_data() # ============================================================================= # Data # ============================================================================= plt.figure(figsize=(10, 4)) plt.plot( time[np.logical_and(time >= np.datetime64('2010-04-01'), time <= np.datetime64('2010-04-30'))], no2[0][[ np.logical_and(time >= np.datetime64('2010-04-01'), time <= np.datetime64('2010-04-30')) ]], 'k') plt.xticks(ticks=np.arange('2010-04-01', '2010-04-30', np.timedelta64(7, 'D'), dtype='datetime64[D]'),
temp = [] print(entities_to_progress) # Process entities, get WORD_DIST words before and after entities for entities in entities_to_progress: start = max(sent.start, entities[0].start - word_dist) end = min(sent.end, entities[-1].end + word_dist) result = doc[start:end] result._.entities = entities sentence_results.append(result) article_results.append(sentence_results) if len(article_results) > 0: #print(article_results) results.append(article_results) return results # Test if __name__ == '__main__': nlp = spacy.load('nl_core_news_sm') data = import_data('hetongelukscraped.csv') articles = data['Artikel'] mydata = articles[:10] results = get_location_descriptions(mydata, nlp)
import sys sys.path.append("python_common") sys.path.append("mosr_back_orm") ps = 0 try: db_session = create_session() #读取执行的间隔 polling_second = db_session.query(SystemPar).filter( SystemPar.par_code == 'polling_second').one() ps = int(polling_second.par_value) db_session.commit() except: db_session.rollback() raise finally: db_session.close() print("连接数据库成功,轮询间隔" + str(ps)) if ps > 0: while True: # download_data() import_data() clean_neo4j() neo4j_command() unionFind() time.sleep(ps)
""" Prepare train and test user IDs Creates train.txt and test.txt """ from random import shuffle from import_data import import_data, make_df data = import_data() df = make_df(data) users = df['userId'].unique() shuffle(users) n = len(users) TRAIN_LEN = round(0.8 * n) train = users[:TRAIN_LEN] TEST_LEN = 10 test = users[TRAIN_LEN:TRAIN_LEN + TEST_LEN] with open('train.txt', 'w') as f: f.write('\n'.join(train)) with open('test.txt', 'w') as f: f.write('\n'.join(test))
############################################# just_pct = True # True eller False: hvis True justerer den pct med øverste linje ############################################# # Evt endre std: ############################################# std = np.array([1., 2, 1.2, 1.4, 1.9, 1.5, 2.1, 2.7, 2.5, 1.1]) ############################################# # IKKE REDIGER: ############################################# print('----------------------------------------------------------') print('Laster inn data fra:', filen) print('Skriver ut data til filer som begynner med:', filen_ut) print('----------------------------------------------------------') print('Kjører ', its, ' itterasjoner') pct_fylker, st_tall_f, partier, fylker, hele_l = import_data(filen) print('----------------------------------------------------------') print('Partier:', partier) ant_dirm_fylker = np.array( [9, 17, 19, 7, 7, 9, 7, 6, 4, 6, 14, 16, 4, 9, 10, 5, 9, 6, 5]) - 1 # generate st. dev matrix ant_fylk = len(fylker) ant_part = len(partier) std_f = np.zeros([ant_fylk, ant_part]) for i in np.arange(ant_fylk): std_f[i, :] = std ind_mdg = partier.index("MDG") hele_l = np.asarray(hele_l).astype(np.float)
""" Authors: Mrunmayee Deshpande, Lu Gan, Bruce Huang, Abhishek Venkataraman """ import timeit from skrvm import RVC import numpy as np import os.path import scipy.io from import_data import import_data ## Set data path parsed_data_path = 'parsed_data/' [X, Y, valX, valY, testX, testY] = import_data(parsed_data_path) scipy.io.savemat('train.mat', dict(X=X, Y=Y)) scipy.io.savemat('val.mat', dict(valX=valX, valY=valY)) scipy.io.savemat('test.mat', dict(testX=testX, testY=testY)) ## Train a RVM clf = RVC(verbose=True) print(clf) clf.fit(valX, valY) clf.score(testX, testY)
def get_valid_performance(in_parser, out_itr, evalTime=None, MAX_VALUE=-99, OUT_ITERATION=5): """ Trains the Marginal DeepPseudo model and give the validation C-index performance for random search. Arguments: - in_parser: dictionary of hyperparameters - out_itr: indicator of set of 5-fold cross validation datasets - evalTime: None or a list(e.g. [12, 60]). Evaluation times at which the validation performance is measured - MAX_VALUE: maximum validation value - OUT_ITERATION: Total number of the set of cross-validation data Returns: - the validation performance of the trained network - save the trained network in the folder directed by "in_parser['out_path'] + '/itr_' + str(out_itr)" """ ## Define a list of continuous columns from the covariates continuous_columns = [ 'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'feature7', 'feature8', 'feature9', 'feature10', 'feature11', 'feature12' ] ## If there are categorical variables in the covariates, define a list of the categorical variables ## Import the attributes tr_data, tr_time, tr_label, y_train, va_data, va_time, va_label, y_val, te_data, te_time, te_label, y_test, num_Category, num_Event, num_evalTime, x_dim = import_data( out_itr, evalTime, categorical_columns=None, continuous_columns=continuous_columns) y_train1 = y_train[:, 0, :] #pseudo values for CIF for cause 1 y_train2 = y_train[:, 1, :] #pseudo values for CIF for cause 2 ## Hyper-parameters ACTIVATION_FN = { 'selu': tf.nn.selu, 'elu': tf.nn.elu, 'tanh': tf.nn.tanh, 'relu': tf.nn.relu } mb_size = in_parser['mb_size'] iteration = in_parser['iteration'] keep_prob = in_parser['keep_prob'] lr_train = in_parser['lr_train'] initial_W = tf.contrib.layers.xavier_initializer() ## Make Dictionaries # Input Dimensions input_dims = { 'x_dim': x_dim, 'num_Event': num_Event, 'num_Category': num_Category, 'num_evalTime': len(evalTime) } # NETWORK HYPER-PARMETERS network_settings = { 'num_units_shared': in_parser['num_units_shared'], 'num_layers_shared': in_parser['num_layers_shared'], 'num_units_CS': in_parser['num_units_CS'], 'num_layers_CS': in_parser['num_layers_CS'], 'activation_fn': ACTIVATION_FN[in_parser['activation_fn']], 'initial_W': initial_W } file_path_final = in_parser['out_path'] + '/itr_' + str(out_itr) #change parameters... if not os.path.exists(file_path_final + '/models/'): os.makedirs(file_path_final + '/models/') ## Use GPU tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) ## Call the Marginal DeepPseudo Model model = CS_Marginal_DeepPseudo_Model(sess, "CS_Marginal_DeepPseudo", input_dims, network_settings) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) max_valid = -99 stop_flag = 0 ### Training - Main print("MAIN TRAINING ...") print("EVALUATION TIMES: " + str(evalTime)) avg_loss = 0 for itr in range(iteration): if stop_flag > 10: #for faster early stopping break else: x_mb, y1_mb, y2_mb = f_get_minibatch( mb_size, tr_data, y_train1, y_train2) #get the minibatches DATA = (x_mb, y1_mb, y2_mb) _, loss_curr = model.train(DATA, keep_prob, lr_train) #train the model avg_loss += loss_curr / 1000 if (itr + 1) % 1000 == 0: print( '|| ITR: ' + str('%04d' % (itr + 1)) + ' | Loss: ' + colored(str('%.4f' % (avg_loss)), 'yellow', attrs=['bold'])) avg_loss = 0 ### Validation based on the average C-index if (itr + 1) % 1000 == 0: ### Prediction for validation data pred = model.predict(va_data) ### Evaluation on validation data val_result = np.zeros([num_Event, len(evalTime)]) for t, t_time in enumerate(evalTime): eval_horizon = int(t_time) if eval_horizon >= num_Category: print('ERROR: evaluation horizon is out of range') val_result[:, t] = -1 else: risk = pred[:, :, t] #risk score until evalTime for k in range(num_Event): val_result[k, t] = weighted_c_index( tr_time, (tr_label[:, 0] == k + 1).astype(int), risk[:, k], va_time, (va_label[:, 0] == k + 1).astype(int), eval_horizon ) #weighted c-index calculation for validation data tmp_valid = np.mean(val_result) #average weighted C-index if tmp_valid > max_valid: stop_flag = 0 max_valid = tmp_valid print('updated.... average c-index = ' + str('%.4f' % (tmp_valid))) if max_valid > MAX_VALUE: saver.save( sess, file_path_final + '/models/model_itr_' + str(out_itr)) else: stop_flag += 1 return max_valid
model_coefficients = np.dot(np.linalg.inv(np.array(matrix)), output) def model(points): out = 0 for a in range(n_coefficients): model_term = 1 for b in range(dimensions): model_term = (model_term * points[b] ** model_exponents[b, a]) model_term = model_term * model_coefficients[a] out = out + model_term return out return model # [file_name] ["plot"] [start] [end] [y_start] [y_end] or # [file_name] [data1] ... [data_n] if __name__ == "__main__": import sys data_outer, output_outer = import_data(sys.argv[1]) sys_model = overfit(data_outer, output_outer) if sys.argv[2].lower() == "plot": plot(sys_model, [(int(sys.argv[3]), int(sys.argv[4])), (int(sys.argv[5]), int(sys.argv[6]))]) else: test_case = [] for data_index, _ in enumerate(data_outer[0]): test_case.append(int(sys.argv[data_index + 2])) print('Prediction: %f' % sys_model(test_case))
import import_data import pandas as pd from sklearn.svm import SVC from sklearn.utils import shuffle from sklearn.cross_validation import train_test_split from multiprocessing import Process,Pool import time import numpy as np def svm_paralle(svm,x_train,y_train): svm.fit(x_train, y_train) print ('over') return svm if __name__=='__main__': print ('import data') data = import_data.import_data('Align_Pixel_RGB1.csv') data = shuffle(data) print('--------------start split data----------------') y = data.pop('o_label') x = data train_size=10000 test_size=3000 data_test = import_data.import_data('Align_Pixel_test.csv') y_test = data_test.pop('o_label').values x_test = data_test.values print('--------------start create model----------------') svm_rbf = SVC(C=10.0, kernel='rbf', degree=3, gamma=0.00001,
import psycopg2 import sys import theUI from manage_db import manage_db from import_data import import_data if __name__ == '__main__': # manage_db() initializes and manage connection to the database mydb = manage_db('localhost','verkefni2', 'postgres', 'postgres') if mydb.missingData(): # import_data() imports the data to the postgresql database data = import_data(mydb) # Creates average rating table from the infomation given to shorten the query time mydb.createAverageRatingsTable() # Starts the GUI for browsing the data window = theUI.loadUI(mydb)
def test_import_data(capsys, dataset): import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI) out, _ = capsys.readouterr() assert 'Dataset resource name: ' in out
import psycopg2 import sys import theUI from manage_db import manage_db from import_data import import_data if __name__ == '__main__': mydb = manage_db('localhost','verkefni3', 'postgres', 'postgres') importer = import_data(mydb) if mydb.missingData(): print("Looking for data") importer.findData() print("Got the data") window = theUI.loadUI(mydb,importer)