예제 #1
0
def dataset():
    # create a temporary dataset
    dataset = manage_dataset.create_dataset(PROJECT_ID)

    # import some data to it
    import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI)

    yield dataset

    # tear down
    manage_dataset.delete_dataset(dataset.name)
def dataset():
    # create a temporary dataset
    dataset = manage_dataset.create_dataset(PROJECT_ID)

    # import some data to it
    import_data.import_data(dataset.name, 'VIDEO', INPUT_GCS_URI)

    yield dataset

    # tear down
    manage_dataset.delete_dataset(dataset.name)
예제 #3
0
def cost_function1(DATA_FOLDER1, DATA_FOLDER2):

    stations1 = importd.import_data(DATA_FOLDER1)
    stations2 = importd.import_data(DATA_FOLDER2)
    values1, values2, times, fact = preconditionnement(stations1, stations2)
    cost = 0

    if stations1.keys() == stations2.keys():
        for key in values1.keys():
            for j in range(len(times) - 1):
                cost += abs(values1[key][j] - values2[key][j]) * (
                    times[j + 1] - times[j])  #carrés à droite

        return cost * fact * 10**-4
    print("incompatible data")
예제 #4
0
def cost_function0(DATA_FOLDER1, DATA_FOLDER2):

    stations1 = importd.import_data(DATA_FOLDER1)
    stations2 = importd.import_data(DATA_FOLDER2)
    cost = 0

    if stations1.keys() == stations2.keys():
        for keys in stations1.keys():
            cost += abs(stations1[keys].initial_time -
                        stations2[keys].initial_time) * alpha + abs(
                            stations1[keys].first_peak_value -
                            stations2[keys].first_peak_value) * beta

        return cost
    print("incompatible data")
예제 #5
0
def main():	
	print "Starting Data Import..."
	import_data.import_data()
	print "Data Import Complete"
	raw_input("Press Enter to Start Data Parsing")
	length = 225 # trading days: 225 days about 10.5 month or 45 weeks
	event = "pdufa"	# choice: "nda", "adcom", "pdufa"
	goodlist = get_filtered_list(event)	
	directionlist = ["around", "toward", "after"]
	sizelist = ["all" , "small" , "mid", "large"]

	for i in range(len(directionlist)):
		for j in range(len(sizelist)):
			write_data(goodlist, event, directionlist[i], length, sizelist[j])
	print "Data Parsing Complete"
def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    # N = 50
    # inputs, targets = sample_data(N, arbitrary_function_2, seed=1)
    wine_data, wine_features = import_data('winequality-red.csv')
    inputs = wine_data[:, 0:11]
    targets = wine_data[:, 11]

    # specify the centres and scale of some rbf basis functions
    default_centres = np.linspace(0,1,21)
    default_scale = 0.03
    default_reg_param = 0.08

    # get the cross-validation folds
    num_folds = 5
    folds = create_cv_folds(inputs.shape[0], num_folds)

    # # evaluate then plot the performance of different reg params
    evaluate_reg_param(inputs, targets, folds, default_centres, default_scale)
    # # evaluate then plot the performance of different scales
    # evaluate_scale(inputs, targets, folds, default_centres, default_reg_param)
    # # evaluate then plot the performance of different numbers of basis
    # # function centres.
    evaluate_num_centres(
        inputs, targets, folds, default_scale, default_reg_param)

    plt.show()
예제 #7
0
def main():

    wine_data, wine_features = import_data('winequality-red.csv')

    #matrix of features
    inputmtx = wine_data[:, 0:11]

    #array of targets
    targets = wine_data[:, 11]
    N = len(targets)

    # set the k's and number of cross-v folds
    Ks = 160
    num_folds = 10

    # create cv folds
    folds = create_cv_folds(N, num_folds)

    # return an error of k x M rmse errors
    errormtxs = cv_evaluation(inputmtx, targets, folds, Ks)

    errormean = np.zeros((Ks, 5))
    for fold in errormtxs:
        matrix = errormtxs[fold]
        errormean = matrix + errormean

    errormean = errormean / num_folds

    threemtx = errormean[0, :, :]

    ks = np.linspace(1, Ks, Ks)
    display_error_graphs(threemtx, ks)

    print("AVERAGE ERRORS")
    print_errors_2d_mtx(threemtx, ks)
예제 #8
0
def poly_model_reg():
    """
    Loops through a range of different regression coefficents to find the optimum
    then plots the error and comparison graphs
    """
    wine_data, wine_features = import_data('winequality-red.csv')
    inputmtx = wine_data[:, 0:11]
    # inputmtx = wine_data[:, [1,9,10]] # For the improved Regression Answer
    targets = wine_data[:, 11]
    train_inputmtx, train_targets, test_inputmtx, test_targets = \
        cross_validation(inputmtx, targets, 0.25)
    reg_coeffs = np.linspace(0, 1, 51)
    degrees = np.linspace(1, 15, 15)
    errormtx = np.zeros((len(reg_coeffs), len(degrees)))
    threemtx = np.zeros((len(reg_coeffs), len(degrees), 5))

    for i in range(int(len(errormtx))):
        for j in range(int(len(errormtx[i]))):
            outputmtx = expand_to_2Dmonomials(train_inputmtx, int(degrees[j]))
            weights = regularised_ml_weights(outputmtx, train_targets,
                                             reg_coeffs[i])
            prediction_func = construct_3dpoly(int(degrees[j]), weights)
            prediction_values = prediction_func(test_inputmtx)
            errorarr = error_score(test_targets, prediction_values)
            for k in range(0, 5):
                threemtx[i, j, k] = errorarr[k]

    display_error_graphs(threemtx, degrees)
    display_3d_error_graphs(threemtx, reg_coeffs, degrees)

    print_errors_3d_mtx(threemtx, reg_coeffs, degrees)
예제 #9
0
def mean_hr_bpm(filename):
    """module to take user input for time scale, and analyze ECG input in \
    that time scale

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns heartrate: heartrate during a specific period as a float
    :raises IOError: raised if user tries to input value not accepted by \
    program
    :raises ValueError: raised if the generally accepted values fall outside \
    of the signal time range
    """

    #   time_input = input("Please input time (10 sec or 20 sec): ")
    time_input = "10 sec"
    time_vector = extract_time_data(filename)
    if np.max(time_vector) >= float(time_input[:-4]):
        if str(time_input) == "10" + " sec":
            ind = np.where(time_vector == 10)[0]
            df = import_data(filename)
            values = df.values
            trimmed = values[np.arange(0, ind), 1]
            trim_norm = trimmed - np.mean(trimmed)
            template = pd.read_csv("test_data/template.csv", header=None)
            norm_template = extract_template_data(template)
            corr = np.correlate(norm_template, trim_norm, mode="full")
            peaks = signal.find_peaks_cwt(corr, np.arange(1, 300))
            heartrate = len(peaks) / (10 / 60)
        elif str(time_input) == "20" + " sec":
            ind = np.where(time_vector == 20)[0]
            df = import_data(filename)
            values = df.values
            trimmed = values[np.arange(0, ind), 1]
            trim_norm = trimmed - np.mean(trimmed)
            template = pd.read_csv("test_data/template.csv", header=None)
            norm_template = extract_template_data(template)
            corr = np.correlate(norm_template, trim_norm, mode="full")
            peaks = signal.find_peaks_cwt(corr, np.arange(1, 300))
            heartrate = len(peaks) / (20 / 60)
        else:
            raise IOError("Invalid input. Try Again (Make sure to include "
                          "sec)")
    else:
        raise ValueError("Attempted input outside signal range")
    return heartrate
예제 #10
0
    def import_ref(self):
        file_name = "spectra.inp"

        output_file = os.path.join(self.path, file_name)
        config_file = os.path.join(self.path, file_name + "import.inp")
        self.im = import_data(output_file, config_file)
        self.im.run()
        self.update()
예제 #11
0
 def import_data(self):
     target = os.path.join(get_sim_path(),
                           "fit_data" + str(self.index) + ".inp")
     config = os.path.join(get_sim_path(),
                           "fit_import_config" + str(self.index) + ".inp")
     self.im = import_data(target, config)
     self.im.run()
     self.update()
예제 #12
0
def mean_hr_bpm(filename):
    """module to take user input for time scale, and analyze ECG input in \
    that time scale

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns heartrate: heartrate during a specific period as a float
    :raises IOError: raised if user tries to input value not accepted by \
    program
    :raises ValueError: raised if the generally accepted values fall outside \
    of the signal time range
    """

#   time_input = input("Please input time (10 sec or 20 sec): ")
    time_input = "10 sec"
    time_vector = extract_time_data(filename)
    if np.max(time_vector) >= float(time_input[:-4]):
        if str(time_input) == "10" + " sec":
            ind = np.where(time_vector == 10)[0]
            df = import_data(filename)
            values = df.values
            trimmed = values[np.arange(0, ind), 1]
            trim_norm = trimmed - np.mean(trimmed)
            template = pd.read_csv("test_data/template.csv", header=None)
            norm_template = extract_template_data(template)
            corr = np.correlate(norm_template, trim_norm, mode="full")
            peaks = signal.find_peaks_cwt(corr, np.arange(1, 300))
            heartrate = len(peaks) / (10/60)
        elif str(time_input) == "20" + " sec":
            ind = np.where(time_vector == 20)[0]
            df = import_data(filename)
            values = df.values
            trimmed = values[np.arange(0, ind), 1]
            trim_norm = trimmed - np.mean(trimmed)
            template = pd.read_csv("test_data/template.csv", header=None)
            norm_template = extract_template_data(template)
            corr = np.correlate(norm_template, trim_norm, mode="full")
            peaks = signal.find_peaks_cwt(corr, np.arange(1, 300))
            heartrate = len(peaks) / (20/60)
        else:
            raise IOError("Invalid input. Try Again (Make sure to include "
                          "sec)")
    else:
        raise ValueError("Attempted input outside signal range")
    return heartrate
예제 #13
0
def output_test():
    data = import_data.import_data('Align_Pixel_RGB1.csv')
    data = shuffle(data)
    train_size = 100000
    test_size = 3000

    print('--------------start split data----------------')

    data_test = data[-test_size:]
    data_test.to_csv('Align_Pixel_test.csv', index=None)
예제 #14
0
def correlation(distance_field, data_folder):
    stations = import_data(data_folder)
    initial_detection_times = np.array(
        [stations[s].initial_time for s in stations])
    print(initial_detection_times)
    normalized_initial_detection_times = initial_detection_times / np.linalg.norm(
        initial_detection_times)
    normalized_distance_field = distance_field / np.linalg.norm(
        distance_field, axis=2, keepdims=True)
    # return np.linalg.norm(normalized_distance_field - normalized_initial_detection_times,axis=2)
    return np.tensordot(normalized_distance_field,
                        normalized_initial_detection_times,
                        axes=[2, 0])
예제 #15
0
	def import_ref(self):
		file_name=None
		if self.notebook.tabText(self.notebook.currentIndex()).strip()==_("Absorption"):
			file_name="alpha.omat"

		if self.notebook.tabText(self.notebook.currentIndex()).strip()==_("Refractive index"):
			file_name="n.omat"

		if file_name!=None:
			output_file=os.path.join(self.path,file_name)
			config_file=os.path.join(self.path,file_name+"import.inp")
			self.im=import_data(output_file,config_file)
			self.im.run()
			self.update()
예제 #16
0
def poly_model_reg():
    """
    Loops through a range of different regression coefficents to find the optimum
    then plots the error and comparison graphs
    """

    # Collects wine data and spilts accoriding to features (inputs) and labels (targets)
    wine_data, wine_features = import_data('winequality-red.csv')
    # inputmtx = wine_data[:, 0:11]
    # print(wine_features[0,1,2,4,6,7,9,10])
    inputmtx = wine_data[:, (0,1,2,4,6,7,9,10)]
    targets = wine_data[:, 11]

    # Reserve data for model validation
    inputmtx, targets, final_inputs, final_targets = cross_validation(inputmtx, targets, 0.1)

    # Create Folds
    num_folds = 10
    N = len(targets)
    folds = create_cv_folds(N, num_folds)

    # Set variables and then train and test the model for each fold
    reg_coeffs = np.linspace(0, 1, 51)
    degrees = np.linspace(1, 5, 5)
    errors, weights = cv_evaluation_poly_model(inputmtx, targets, folds, degrees, reg_coeffs)

    # Collocate errors for each fold to and find the mean errors.
    errormean = np.zeros((len(reg_coeffs), len(degrees), 5))
    for fold in errors:
        matrix = errors[fold]
        errormean = matrix + errormean
    errormean = errormean / num_folds
    errormean = errormean[0, :, :, :]

    # Print Errors
    min_reg, min_deg = print_errors_3d_mtx(errormean, reg_coeffs, degrees)
    error_deg = errormean[0, :, :]
    display_error_graphs(error_deg, degrees, "Degree Factor", "Change in Polynomial Degrees", "deg")
    error_reg = errormean[:, 2, :]
    display_error_graphs(error_reg, reg_coeffs, "Regression Coefficient ($\lambda$)",
                         "Change in Regression Coefficient", "reg")

    # Create aggreate final model across all the folds:
    min_reg_index = reg_coeffs.tolist().index(min_reg)
    min_deg_index = degrees.tolist().index(min_deg)
    weightsksize = int(max(degrees) * inputmtx.shape[1])
    weightsmean = findbestweights(weights, min_reg_index, min_deg_index, weightsksize)

    test_optimised_model(min_deg, weightsmean, final_inputs, final_targets)
예제 #17
0
def extract_time_data(filename):
    """pulls time data out of pandas data frame from ECG input

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns time: array of time values from ECG input
    """
    from import_data import import_data
    df = import_data(filename)
    values = df.values
    time = values[:, 0]
    logging.info("extract_time_data: time data found")
    logging.debug("time="+str(time))
    return time
예제 #18
0
def extract_time_data(filename):
    """pulls time data out of pandas data frame from ECG input

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns time: array of time values from ECG input
    """
    from import_data import import_data
    df = import_data(filename)
    values = df.values
    time = values[:, 0]
    logging.info("extract_time_data: time data found")
    logging.debug("time=" + str(time))
    return time
예제 #19
0
def SubsetSLEProjections(sample_dict):

    # SLE projection directory
    #sle_dir = "191220_emulated"
    #sle_dir = "201011_proj_TIMESERIES"
    sle_dir = "2lm_projections"

    # Initialize the output dictionary
    sle_dict = {"ice_source": [], "region": [], "year": [], "scenario-sample": [],\
       "GSAT": [], "SLE": []}

    # Loop over the required SSPs from the sample dictionary
    for this_scenario in sample_dict.keys():

        # Open this matched scenario file
        filename = os.path.join(
            sle_dir, "projections_FAIR_{0}.csv".format(this_scenario.upper()))
        this_sle_dict = import_data(filename, "FAIR")

        # Filter this data for the appropriate samples
        this_sle_dict = filter_data(this_sle_dict,
                                    "FAIR",
                                    sample=sample_dict[this_scenario],
                                    ice_source="Glaciers")

        # Append these data to the output structure
        sle_dict["ice_source"].extend(this_sle_dict["ice_source"])
        sle_dict["region"].extend(this_sle_dict["region"])
        sle_dict["year"].extend(this_sle_dict["year"])
        sle_dict["GSAT"].extend(this_sle_dict["GSAT"])
        sle_dict["SLE"].extend(this_sle_dict["SLE"])

        # Add a field called "scenario-sample" to the dictionary
        scenario_sample = [
            "{0}-{1}".format(this_scenario, x) for x in this_sle_dict["sample"]
        ]
        sle_dict["scenario-sample"].extend(scenario_sample)

    # Convert everything over into numpy arrays
    sle_dict["ice_source"] = np.array(sle_dict["ice_source"])
    sle_dict["region"] = np.array(sle_dict["region"])
    sle_dict["year"] = np.array(sle_dict["year"])
    sle_dict["GSAT"] = np.array(sle_dict["GSAT"])
    sle_dict["SLE"] = np.array(sle_dict["SLE"])
    sle_dict["scenario-sample"] = np.array(sle_dict["scenario-sample"])

    # Return the sea level projection dictionary
    return (sle_dict)
예제 #20
0
def extract_voltage_data(filename):
    """pulls voltage data out of pandas data frame and normalizes values

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns norm_voltage: normalized voltage data
    """
    from import_data import import_data
    df = import_data(filename)
    values = df.values
    voltage = values[:, 1]
    norm_voltage = voltage - np.mean(voltage)
    logging.info("extract_voltage_data: norm_voltage found")
    logging.debug("norm_voltage="+str(norm_voltage))
    return norm_voltage
예제 #21
0
def extract_voltage_data(filename):
    """pulls voltage data out of pandas data frame and normalizes values

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns norm_voltage: normalized voltage data
    """
    from import_data import import_data
    df = import_data(filename)
    values = df.values
    voltage = values[:, 1]
    norm_voltage = voltage - np.mean(voltage)
    logging.info("extract_voltage_data: norm_voltage found")
    logging.debug("norm_voltage=" + str(norm_voltage))
    return norm_voltage
예제 #22
0
파일: duration.py 프로젝트: pcg15/bme590hrm
def duration(filename):
    """module that determines the duration of the ECG input signal

    :param filename: the file name of the ECG input

    :returns duration: duration of signal as a float (sec)
    """
    import pandas as pd
    from import_data import import_data
    logging.info("duration: everything imported")
    df = import_data(filename)
    df.columns = ["time", "voltage"]
    duration = df["time"].max()
    logging.info("duration: duration found")
    logging.debug("duration="+str(duration))
    return duration
예제 #23
0
def main():
    wine_data, wine_features = import_data('winequality-red.csv')
    inputmtx = wine_data[:, 0:10]
    targets = wine_data[:, 11]
    train_inputmtx, train_targets, test_inputmtx, test_targets = cross_validation(
        inputmtx, targets, 0.25)

    train = wine_data[0:250, :]
    target = wine_data[250:1599, :]

    N = len(wine_data[:, 11])
    print(N)

    folds = create_cv_folds(N, 2)

    print(folds)
예제 #24
0
def duration(filename):
    """module that determines the duration of the ECG input signal

    :param filename: the file name of the ECG input

    :returns duration: duration of signal as a float (sec)
    """
    import pandas as pd
    from import_data import import_data
    logging.info("duration: everything imported")
    df = import_data(filename)
    df.columns = ["time", "voltage"]
    duration = df["time"].max()
    logging.info("duration: duration found")
    logging.debug("duration=" + str(duration))
    return duration
예제 #25
0
def voltage_extremes(filename):
    """module to calculate the maximum and minimum lead voltages of the input \
    ECG data

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns voltage_extremes: lead voltage minimum and maximum as tuple \
    (mV)
    """
    import pandas as pd
    from import_data import import_data
    df = import_data(filename)
    df.columns = ["time", "voltage"]
    voltage_min = df["voltage"].min()
    voltage_max = df["voltage"].max()
    voltage_extremes = (voltage_min, voltage_max)
    logging.info("voltage_extremes: voltage_extremes found")
    logging.debug("voltage_extremes="+str(voltage_extremes))
    return voltage_extremes
예제 #26
0
def voltage_extremes(filename):
    """module to calculate the maximum and minimum lead voltages of the input \
    ECG data

    :param filename: the name of a file located in the /test_data folder \
    entered as a string

    :returns voltage_extremes: lead voltage minimum and maximum as tuple \
    (mV)
    """
    import pandas as pd
    from import_data import import_data
    df = import_data(filename)
    df.columns = ["time", "voltage"]
    voltage_min = df["voltage"].min()
    voltage_max = df["voltage"].max()
    voltage_extremes = (voltage_min, voltage_max)
    logging.info("voltage_extremes: voltage_extremes found")
    logging.debug("voltage_extremes=" + str(voltage_extremes))
    return voltage_extremes
def poly_model_reg():
    """
    Loops through a range of different regression coefficents to find the optimum
    then plots the error and comparison graphs
    """
    wine_data, wine_features = import_data('winequality-red.csv')
    inputmtx = wine_data[:, 0:11]
    # inputmtx = wine_data[:, (0, 1, 2, 4, 6, 7, 9, 10)]
    targets = wine_data[:, 11]

    # Create Folds
    num_folds = 10
    N = len(targets)
    folds = create_cv_folds(N, num_folds)
    errormtxs, uppers, lowers, prediction_values = cv_evaluation_poly_model(
        inputmtx, targets, folds, num_folds)

    preds_arry = np.zeros(160)
    upper_arr = np.zeros(160)
    lower_arr = np.zeros(160)
    for x in range(0, prediction_values.shape[1]):
        preds_arry[x] = np.mean(prediction_values[:, x])
        upper_arr[x] = np.mean(uppers[:, x])
        lower_arr[x] = np.mean(lowers[:, x])
    errormean = np.zeros((1, 5))
    for fold in errormtxs:
        matrix = errormtxs[fold]
        errormean = matrix + errormean

    errormean = errormean / num_folds

    errormean = errormean[0, :, :]
    it_priors = np.linspace(1, 50, 50)

    display_error_graphs(errormean, it_priors, "Number of Priors",
                         "Change in Priors", "priors")

    display_bayesian_confidence_graph(preds_arry, upper_arr, lower_arr)
    min_rsme_index = np.argmin(errormean, 0)[0]
    print_error_score(errormean[min_rsme_index])
import functions
import numpy as np
import pandas as pd
from scipy import signal
import seaborn as sns
from sklearn.preprocessing import PowerTransformer
import matplotlib.pyplot as plt
#from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib import rc
rc('font', **{'family': 'serif', 'serif': ['Roman']})
rc('text', usetex=True)
plt.rcParams.update({'font.size': 16})
plt.rcParams['text.latex.preamble'] = [r"\usepackage{amsmath}"]
plt.rcParams['text.latex.preamble'] = [r"\usepackage{bm}"]

no2, nox, pm10, meteo, time = import_data.import_data()

# =============================================================================
# Data
# =============================================================================
plt.figure(figsize=(10, 4))
plt.plot(
    time[np.logical_and(time >= np.datetime64('2010-04-01'),
                        time <= np.datetime64('2010-04-30'))], no2[0][[
                            np.logical_and(time >= np.datetime64('2010-04-01'),
                                           time <= np.datetime64('2010-04-30'))
                        ]], 'k')
plt.xticks(ticks=np.arange('2010-04-01',
                           '2010-04-30',
                           np.timedelta64(7, 'D'),
                           dtype='datetime64[D]'),
예제 #29
0
                    temp = []
            print(entities_to_progress)
            # Process entities, get WORD_DIST words before and after entities
            for entities in entities_to_progress:
                start = max(sent.start, entities[0].start - word_dist)
                end = min(sent.end, entities[-1].end + word_dist)
                result = doc[start:end]
                result._.entities = entities

                sentence_results.append(result)

            article_results.append(sentence_results)

        if len(article_results) > 0:
            #print(article_results)
            results.append(article_results)

    return results


# Test
if __name__ == '__main__':
    nlp = spacy.load('nl_core_news_sm')

    data = import_data('hetongelukscraped.csv')
    articles = data['Artikel']

    mydata = articles[:10]

    results = get_location_descriptions(mydata, nlp)
예제 #30
0
import sys

sys.path.append("python_common")
sys.path.append("mosr_back_orm")

ps = 0
try:
    db_session = create_session()
    #读取执行的间隔
    polling_second = db_session.query(SystemPar).filter(
        SystemPar.par_code == 'polling_second').one()
    ps = int(polling_second.par_value)
    db_session.commit()

except:
    db_session.rollback()
    raise
finally:
    db_session.close()
print("连接数据库成功,轮询间隔" + str(ps))
if ps > 0:
    while True:

        #
        download_data()
        import_data()
        clean_neo4j()
        neo4j_command()
        unionFind()
        time.sleep(ps)
예제 #31
0
파일: prepare.py 프로젝트: 1024pix/open-lab
"""
Prepare train and test user IDs
Creates train.txt and test.txt
"""
from random import shuffle
from import_data import import_data, make_df

data = import_data()
df = make_df(data)

users = df['userId'].unique()
shuffle(users)
n = len(users)
TRAIN_LEN = round(0.8 * n)
train = users[:TRAIN_LEN]
TEST_LEN = 10
test = users[TRAIN_LEN:TRAIN_LEN + TEST_LEN]

with open('train.txt', 'w') as f:
    f.write('\n'.join(train))

with open('test.txt', 'w') as f:
    f.write('\n'.join(test))
예제 #32
0
#############################################
just_pct = True  # True eller False: hvis True justerer den pct med øverste linje
#############################################
# Evt endre std:
#############################################
std = np.array([1., 2, 1.2, 1.4, 1.9, 1.5, 2.1, 2.7, 2.5, 1.1])
#############################################
# IKKE REDIGER:
#############################################

print('----------------------------------------------------------')
print('Laster inn data fra:', filen)
print('Skriver ut data til filer som begynner med:', filen_ut)
print('----------------------------------------------------------')
print('Kjører ', its, ' itterasjoner')
pct_fylker, st_tall_f, partier, fylker, hele_l = import_data(filen)
print('----------------------------------------------------------')
print('Partier:', partier)

ant_dirm_fylker = np.array(
    [9, 17, 19, 7, 7, 9, 7, 6, 4, 6, 14, 16, 4, 9, 10, 5, 9, 6, 5]) - 1
# generate st. dev matrix
ant_fylk = len(fylker)
ant_part = len(partier)
std_f = np.zeros([ant_fylk, ant_part])
for i in np.arange(ant_fylk):
    std_f[i, :] = std

ind_mdg = partier.index("MDG")
hele_l = np.asarray(hele_l).astype(np.float)
예제 #33
0
파일: rvm.py 프로젝트: abhven/ML-Project
"""
Authors: Mrunmayee Deshpande, Lu Gan, Bruce Huang, Abhishek Venkataraman 

"""
import timeit
from skrvm import RVC
import numpy as np
import os.path
import scipy.io

from import_data import import_data

## Set data path
parsed_data_path = 'parsed_data/'
[X, Y, valX, valY, testX, testY] = import_data(parsed_data_path)

scipy.io.savemat('train.mat', dict(X=X, Y=Y))
scipy.io.savemat('val.mat', dict(valX=valX, valY=valY))
scipy.io.savemat('test.mat', dict(testX=testX, testY=testY))

## Train a RVM
clf = RVC(verbose=True)
print(clf)
clf.fit(valX, valY)
clf.score(testX, testY)
예제 #34
0
def get_valid_performance(in_parser,
                          out_itr,
                          evalTime=None,
                          MAX_VALUE=-99,
                          OUT_ITERATION=5):
    """ Trains the Marginal DeepPseudo model and give the validation C-index performance for random search.

    Arguments:
        - in_parser: dictionary of hyperparameters
        - out_itr: indicator of set of 5-fold cross validation datasets
        - evalTime: None or a list(e.g. [12, 60]). Evaluation times at which the validation performance is measured
        - MAX_VALUE: maximum validation value
        - OUT_ITERATION: Total number of the set of cross-validation data

    Returns:
        - the validation performance of the trained network
        - save the trained network in the folder directed by "in_parser['out_path'] + '/itr_' + str(out_itr)"
    """

    ## Define a list of continuous columns from the covariates
    continuous_columns = [
        'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6',
        'feature7', 'feature8', 'feature9', 'feature10', 'feature11',
        'feature12'
    ]
    ## If there are categorical variables in the covariates, define a list of the categorical variables

    ## Import the attributes
    tr_data, tr_time, tr_label, y_train, va_data, va_time, va_label, y_val, te_data, te_time, te_label, y_test, num_Category, num_Event, num_evalTime, x_dim = import_data(
        out_itr,
        evalTime,
        categorical_columns=None,
        continuous_columns=continuous_columns)
    y_train1 = y_train[:, 0, :]  #pseudo values for CIF for cause 1
    y_train2 = y_train[:, 1, :]  #pseudo values for CIF for cause 2

    ## Hyper-parameters
    ACTIVATION_FN = {
        'selu': tf.nn.selu,
        'elu': tf.nn.elu,
        'tanh': tf.nn.tanh,
        'relu': tf.nn.relu
    }
    mb_size = in_parser['mb_size']
    iteration = in_parser['iteration']
    keep_prob = in_parser['keep_prob']
    lr_train = in_parser['lr_train']
    initial_W = tf.contrib.layers.xavier_initializer()

    ## Make Dictionaries
    # Input Dimensions
    input_dims = {
        'x_dim': x_dim,
        'num_Event': num_Event,
        'num_Category': num_Category,
        'num_evalTime': len(evalTime)
    }

    # NETWORK HYPER-PARMETERS
    network_settings = {
        'num_units_shared': in_parser['num_units_shared'],
        'num_layers_shared': in_parser['num_layers_shared'],
        'num_units_CS': in_parser['num_units_CS'],
        'num_layers_CS': in_parser['num_layers_CS'],
        'activation_fn': ACTIVATION_FN[in_parser['activation_fn']],
        'initial_W': initial_W
    }

    file_path_final = in_parser['out_path'] + '/itr_' + str(out_itr)

    #change parameters...
    if not os.path.exists(file_path_final + '/models/'):
        os.makedirs(file_path_final + '/models/')

    ## Use GPU
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    ## Call the Marginal DeepPseudo Model
    model = CS_Marginal_DeepPseudo_Model(sess, "CS_Marginal_DeepPseudo",
                                         input_dims, network_settings)
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

    max_valid = -99
    stop_flag = 0

    ### Training - Main
    print("MAIN TRAINING ...")
    print("EVALUATION TIMES: " + str(evalTime))

    avg_loss = 0
    for itr in range(iteration):
        if stop_flag > 10:  #for faster early stopping
            break
        else:
            x_mb, y1_mb, y2_mb = f_get_minibatch(
                mb_size, tr_data, y_train1, y_train2)  #get the minibatches
            DATA = (x_mb, y1_mb, y2_mb)
            _, loss_curr = model.train(DATA, keep_prob,
                                       lr_train)  #train the model
            avg_loss += loss_curr / 1000

            if (itr + 1) % 1000 == 0:
                print(
                    '|| ITR: ' + str('%04d' % (itr + 1)) + ' | Loss: ' +
                    colored(str('%.4f' %
                                (avg_loss)), 'yellow', attrs=['bold']))
                avg_loss = 0

            ### Validation based on the average C-index
            if (itr + 1) % 1000 == 0:

                ### Prediction for validation data
                pred = model.predict(va_data)

                ### Evaluation on validation data
                val_result = np.zeros([num_Event, len(evalTime)])

                for t, t_time in enumerate(evalTime):
                    eval_horizon = int(t_time)
                    if eval_horizon >= num_Category:
                        print('ERROR: evaluation horizon is out of range')
                        val_result[:, t] = -1
                    else:
                        risk = pred[:, :, t]  #risk score until evalTime
                        for k in range(num_Event):
                            val_result[k, t] = weighted_c_index(
                                tr_time, (tr_label[:, 0] == k + 1).astype(int),
                                risk[:, k], va_time,
                                (va_label[:, 0] == k + 1).astype(int),
                                eval_horizon
                            )  #weighted c-index calculation for validation data

                tmp_valid = np.mean(val_result)  #average weighted C-index

                if tmp_valid > max_valid:
                    stop_flag = 0
                    max_valid = tmp_valid
                    print('updated.... average c-index = ' + str('%.4f' %
                                                                 (tmp_valid)))

                    if max_valid > MAX_VALUE:
                        saver.save(
                            sess, file_path_final + '/models/model_itr_' +
                            str(out_itr))
                else:
                    stop_flag += 1

    return max_valid
예제 #35
0
    model_coefficients = np.dot(np.linalg.inv(np.array(matrix)), output)

    def model(points):
        out = 0
        for a in range(n_coefficients):
            model_term = 1
            for b in range(dimensions):
                model_term = (model_term * points[b] ** model_exponents[b, a])
            model_term = model_term * model_coefficients[a]
            out = out + model_term

        return out

    return model


# [file_name] ["plot"] [start] [end] [y_start] [y_end] or
# [file_name] [data1] ... [data_n]
if __name__ == "__main__":
    import sys
    data_outer, output_outer = import_data(sys.argv[1])
    sys_model = overfit(data_outer, output_outer)
    if sys.argv[2].lower() == "plot":
        plot(sys_model, [(int(sys.argv[3]), int(sys.argv[4])),
                         (int(sys.argv[5]), int(sys.argv[6]))])
    else:
        test_case = []
        for data_index, _ in enumerate(data_outer[0]):
            test_case.append(int(sys.argv[data_index + 2]))
        print('Prediction: %f' % sys_model(test_case))
예제 #36
0
import  import_data
import pandas as pd
from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn.cross_validation import  train_test_split
from multiprocessing import Process,Pool
import time
import numpy as np

def svm_paralle(svm,x_train,y_train):
    svm.fit(x_train, y_train)
    print ('over')
    return svm
if __name__=='__main__':
    print ('import data')
    data = import_data.import_data('Align_Pixel_RGB1.csv')
    data = shuffle(data)

    print('--------------start split data----------------')

    y = data.pop('o_label')
    x = data
    train_size=10000
    test_size=3000

    data_test = import_data.import_data('Align_Pixel_test.csv')
    y_test = data_test.pop('o_label').values
    x_test = data_test.values

    print('--------------start create model----------------')
    svm_rbf = SVC(C=10.0, kernel='rbf', degree=3, gamma=0.00001,
예제 #37
0
파일: run.py 프로젝트: thortom/T-316-GAVI
import psycopg2
import sys
import theUI
from manage_db import manage_db
from import_data import import_data


if __name__ == '__main__':
	# manage_db() initializes and manage connection to the database
    mydb = manage_db('localhost','verkefni2', 'postgres', 'postgres')
    if mydb.missingData():
    	# import_data() imports the data to the postgresql database
    	data = import_data(mydb)

    # Creates average rating table from the infomation given to shorten the query time
    mydb.createAverageRatingsTable()
    # Starts the GUI for browsing the data
    window = theUI.loadUI(mydb)
def test_import_data(capsys, dataset):
    import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI)
    out, _ = capsys.readouterr()
    assert 'Dataset resource name: ' in out
예제 #39
0
파일: run.py 프로젝트: thortom/T-316-GAVI
import psycopg2
import sys
import theUI
from manage_db import manage_db
from import_data import import_data

if __name__ == '__main__':
    mydb = manage_db('localhost','verkefni3', 'postgres', 'postgres')
    importer = import_data(mydb)
    if mydb.missingData():
        print("Looking for data")
        importer.findData()
        print("Got the data")
    window = theUI.loadUI(mydb,importer)