Exemple #1
0
def estimate_phase_dev(cell, temperature):
    """
    Final estimate for the circadian phase deviation.

    Parameters
    ----------
    cell : string
        Cell condition.
    temperature : int
        Temperature condition.

    Returns
    -------
    The standard deviation for the phase progression, and the periods.
    """

    ######### CORRECTION BECAUSE NOT ENOUGH TRACES AT 34°C AND 40°C #########
    print('CAUTION : Parameters for None temperature selected since not enough \
                                                       traces at 34°C and 40°C')
    temperature = None

    ##################### LOAD DATA ##################
    if cell == 'NIH3T3':
        path =  "Data/NIH3T3.ALL.2017-04-04/ALL_TRACES_INFORMATION.p"
        dataClass=LoadData(path, 10000000, temperature = temperature,
                            division = False)
        (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak,
        ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)
        #print(len(ll_area))
        std, std_T = estimate_phase_dev_from_signal(ll_peak)

    elif cell == 'U2OS':
        path = "Data/U2OS-2017-03-20/ALL_TRACES_INFORMATION_march_2017.p"
        dataClass=LoadData(path, 10000000, temperature = temperature,
                            division = True)
        (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak,
        ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)
        std, std_T = estimate_phase_dev_from_signal(ll_peak)
        #correction for the neglected coupling since dividing traces
        std = std*0.65
        std_T  = std_T *0.65

    else:
        print("Cell type doesn't exist")

    '''
    for (idx, l_signal), l_peak in zip(enumerate(ll_signal), ll_peak):
        plt.plot(l_signal)
        plt.plot(l_peak)
        plt.show()
        plt.close()
        if idx>17:
            break
    '''
    return std, std_T
Exemple #2
0
def train():
    batch_size = 10
    epochs = 50
    bestloss = 1e10
    learning_rate = 5e-4
    Trainer = VGG16Trainer().cuda()

    path = './train'
    trainLabel = getLabel(path)
    traindata = LoadData(path, Label=trainLabel)
    dataloader = DataLoader(traindata, batch_size, shuffle=True)
    valLabel = getLabel('./val')
    valdata = LoadData('./val', Label=valLabel)
    valdataloader = DataLoader(valdata, batch_size, shuffle=True)
    count = 0
    for epoch in range(epochs):

        if count == 5:
            learning_rate *= 0.5
            for param_group in Trainer.optimizer.param_groups:
                param_group['lr'] = learning_rate

        if count == 10:
            break

        Trainer.train()
        totalloss = 0
        for i_batch, batch_data in enumerate(dataloader):
            image = batch_data['image']
            label = batch_data['label'].cuda()
            image = image.cuda().float() / 255.
            loss = Trainer.train_step(image, label)
            totalloss += loss
        print('train loss:')
        print(totalloss / len(dataloader))

        Trainer.eval()
        valloss = 0
        with torch.no_grad():
            for i_batch, batch_data in enumerate(valdataloader):
                image = batch_data['image']
                label = batch_data['label'].cuda()
                image = image.cuda().float() / 255.
                valloss += Trainer.forward(image, label)
        print('val loss:')
        valloss_a = valloss / len(valdataloader)
        print(valloss_a)
        if valloss_a < bestloss:
            bestloss = valloss_a
            print('saved')
            Trainer.save('VGG.pkl')
            count = 0
        else:
            count += 1
def estimate_OU_par(cell, temperature, W=None, gamma_A=0.03, gamma_B=0.03):
    """
    Estimate mean and variance of OU processes given a set of conditions,
    according to which a set of traces is filtered.

    Parameters
    ----------
    cell : string
        Cell type.
    temperature : integer
        Temperature condition.
    W : list
        Waveform.
    gamma_A : float
        Regression parameter for the amplitude.
    gamma_b : float
        Regression parameter for the background.

    Returns
    -------
    The mean and standard deviations of the amplitude and the background.
    """
    ######### CORRECTION BECAUSE NOT ENOUGH TRACES AT 34°C AND 40°C #########
    print(
        'CAUTION : Parameters for None temperature selected since not enough \
            traces at 34°C and 40°C')
    temperature = None

    ##################### LOAD DATA ################
    if cell == 'NIH3T3':
        path = "Data/NIH3T3.ALL.2017-04-04/ALL_TRACES_INFORMATION.p"
        dataClass = LoadData(path,
                             10000000,
                             temperature=temperature,
                             division=False)
    elif cell == 'U2OS':
        path = "Data/U2OS-2017-03-20/ALL_TRACES_INFORMATION_march_2017.p"
        dataClass = LoadData(path,
                             10000000,
                             temperature=temperature,
                             division=True)

    try:
        (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak,
        ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)
    except:
        dataClass.path = '../' + dataClass.path
        (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak,
        ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)

    return estimate_OU_par_from_signal(ll_signal, W, gamma_A, gamma_B)
Exemple #4
0
def estimate_cycle_dev(cell, temperature):
    """
    Final estimate for the cell-cycle phase deviation.

    Parameters
    ----------
    cell : string
        Cell condition.
    temperature : int
        Temperature condition.

    Returns
    -------
    The standard deviation for the phase progression, and the periods.
    """
    ##################### LOAD DATA ##################
    if cell == 'NIH3T3':
        path =  "Data/NIH3T3.ALL.2017-04-04/ALL_TRACES_INFORMATION.p"
    elif cell == 'U2OS':
        path = "Data/U2OS-2017-03-20/ALL_TRACES_INFORMATION_march_2017.p"
    dataClass=LoadData(path, 10000000, temperature = temperature,
                        division = True)
    (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak, \
    ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)
    std, std_T = estimate_phase_dev_from_div(ll_idx_cell_cycle_start)

    return std, std_T
Exemple #5
0
def MeanAndVarMapper(fileName):
    inputData = LoadData(fileName)[0]
    inputMat = numpy.mat(inputData)
    mean = numpy.mean(inputMat)
    var = numpy.var(inputData)
    count = len(inputData)
    return mean, var, count
    def predict(self):
        """
        Retruns the forecasts of the run function
        """
        #import pdb; pdb.set_trace()
        #classifier = self.optimization()[1]
        self.optimization()
        classifier = pickle.load(open('best_model3.pkl', 'rb'))
        predict_model = theano.function(
            inputs=[classifier.input],
            outputs=classifier.LinearRegression.y_pred)

        # We can test it on some examples from test test
        data = LoadData(self.link)
        datasets = data.load_data()
        #import pdb; pdb.set_trace()
        x_test, y_test = datasets[2]

        predicted_values = predict_model(x_test.get_value())
        fig = figure()
        _ = plt.scatter(x_test.get_value(),
                        predicted_values,
                        c='red',
                        label='Predicted Values')
        _ = plt.scatter(x_test.get_value(),
                        y_test.get_value(),
                        facecolors='none',
                        edgecolors='r',
                        label='Sample Points')
        _ = plt.legend()
        #plt.show()
        return fig
Exemple #7
0
 def train_n_classes(self):
     l = LoadData()
     stopWords = l.loadStopWords()
     train_sizes = [100, 200, 300]  # size per class
     for train_size in train_sizes:
         print('Training size:',
               math.floor(train_size * 0.75) * 5, 'Test size:',
               math.ceil(train_size * 0.25) * 5)
         self.loadData(train_size)
         vect = TfidfVectorizer(stop_words=stopWords)
         self.train_and_test_split(0.75)
         classes = {}
         x = 0
         for i in self.data['class_name']:
             if i not in classes:
                 classes[i] = x
                 x += 1
         X_train = vect.fit_transform(self.train_data['data'])
         Y_train = [classes[i] for i in self.train_data['class_name']]
         X_test = vect.transform(self.test_data['data'])
         Y_test = [classes[i] for i in self.test_data['class_name']]
         nb = MultinomialNB()
         Y_pred = nb.fit(X_train, Y_train).predict(X_test)
         self.metric(Y_test, Y_pred)
         print('---------------------------------------------------')
def main():
    """
    This is the main program of the project. It calls all functions to get the result and shows it to the user.
    """
    try:
        yelp = LoadData()
        user = UserChoice()
        choice = user.get_user_input()
        plots = PlotVisualization(yelp.get_data())
        h = Html()
        # Output result to html
        if choice == 'quit':
            print "Quitting..."
            pass
        elif choice == "overview":
            plots.plot_overview()
            print "Overview only."
            h.output_to_file(False)
        else:
            plots.plot_search_results(choice)
            print 'Your choice of restaurants received.'
            h.output_to_file(True)

    except ValueError:
        print "Found value error."
        sys.exit()
    except KeyboardInterrupt:
        print "Interrupted!"
        sys.exit()
    except MemoryError:
        print "Memory Error"
        sys.exit()
def main():
    # Data loading
    arg_parser = CSACCMArgs()
    args = arg_parser.args
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    np.random.seed(args.random_seed)
    tf.set_random_seed(args.random_seed)

    data = LoadData(args.path, args.dataset, label=args.label, sep=args.sep, append_id=True, include_id=False)
    if args.verbose > 0:
        print(args)

    # Training
    t1 = time()
    model = CSACCM(feature_num=len(data.features), feature_dims=data.feature_dims, f_vector_size=args.f_vector_size,
                   user_feature_num=len(data.user_features), item_feature_num=len(data.item_features),
                   user_num=data.user_num, item_num=data.item_num, ui_vector_size=args.ui_vector_size,
                   warm_ratio=args.warm_ratio,
                   cb_hidden_layers=eval(args.cb_hidden_layers), attention_size=args.attention_size,
                   optimizer=args.optimizer, learning_rate=args.lr, batch_size=args.batch_size, epoch=args.epoch,
                   dropout_keep=args.dropout_keep, l2_regularize=args.l2,
                   verbose=args.verbose, random_seed=args.random_seed, model_path=args.model)
    if args.load == 1:
        model.load_model()
    # model.run_debug(data.test_data)

    # train
    model.train(data.train_data, data.validation_data, data.test_data, args.load == 1)
    model.print_result(t1)

    # test
    model.load_model()
    model.predict(data.test_data)
Exemple #10
0
    def invoke_data_summary(self):
        # Instantiate classes

        # Load Data from pickle files
        ld = LoadData(self.my_variables.training_file,
                      self.my_variables.testing_file,
                      self.my_variables.validation_file)
        train_test_valid_data = ld.get_data()

        #########################################################################################################
        self.x_train, self.y_train = train_test_valid_data[
            0], train_test_valid_data[1]
        self.x_test, self.y_test = train_test_valid_data[
            2], train_test_valid_data[3]
        self.x_valid, self.y_valid = train_test_valid_data[
            4], train_test_valid_data[5]

        #########################################################################################################
        # Basic Summary of dataset
        self.bs.summary_report(self.x_train, self.y_train, self.x_test,
                               self.y_test, self.x_valid, self.y_valid)

        #########################################################################################################
        # Exploratory visualization for train data
        self.vz.bar_chart(self.y_train, "train_data")
        # Exploratory visualization for train data
        self.vz.bar_chart(self.y_test, "test_data")
        # Exploratory visualization for train data
        self.vz.bar_chart(self.y_valid, "validation_data")

        #########################################################################################################
        self.vz.read_sign_names_from_csv(self.my_variables)
        self.vz.display_random_images(self.x_train, self.y_train,
                                      self.my_variables, "train")
Exemple #11
0
    def __init__(self, tickers, start='2014-01-01', end='2018-01-01', interval='1d', n_series=20, T_pred=10, n_cols=30, n_rows=30, T_space=10, train=True):

        self.folder = './' + ''.join(tickers) + '_start' + start + '_end' + end + '_int' + interval + \
                      '/case' + str(n_series) + '_' + str(T_pred) + '_' + str(n_cols) + '_' + str(n_rows) + '_' + str(T_space)

        try:
            self.original = np.load(self.folder + '/original.npy')
            if train:
                self.x = np.load(self.folder + '/Xtrain.npy')
                self.y = np.load(self.folder + '/Ytrain.npy')
            else:
                self.x = np.load(self.folder + '/Xtest.npy')
                self.y = np.load(self.folder + '/Ytest.npy')
        except:
            ld = LoadData(tickers, start, end, interval)
            try:
                ld.unprocessed = pd.read_csv('./' + ''.join(tickers) + '_start' + start + '_end' + end + '_int' + interval + '/UnprocessedData.csv')
            except:
                print('DOWNLOADING DATA')
                ld.download()
            print('PROCESSING DATA')
            ld.process(n_series, T_pred, n_cols, n_rows, T_space, plot=True)
            ld.cut_and_shuffle()

            if train:
                self.x = ld.Xtrain
                self.y = ld.Ytrain
            else:
                self.x = ld.Xtest
                self.y = ld.Ytest
            self.original = ld.original

        # Shape of X: (Number of datasamples, Number of tickers, Number of rows, Number of columns)
        # Shape of Y: (Number of datasamples, Number of tickers)
        self.len = self.x.shape[0]
Exemple #12
0
def main():
    # Data loading
    arg_parser = BaseArgs()
    args = arg_parser.args

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    np.random.seed(args.random_seed)
    tf.set_random_seed(args.random_seed)

    data = LoadData(args.path, args.dataset, label=args.label, append_id=False, include_id=True)
    if args.verbose > 0:
        print(args)

    # Training
    t1 = time()
    model = BaseModel(feature_num=len(data.features), optimizer=args.optimizer,
                      learning_rate=args.lr, batch_size=args.batch_size, epoch=args.epoch,
                      dropout_keep=args.dropout_keep, l2_regularize=args.l2,
                      verbose=args.verbose, random_seed=args.random_seed, model_path=args.model)
    if args.load == 1:
        model.load_model()
    # model.run_debug(data.test_data)

    # train
    model.train(data.train_data, data.validation_data, data.test_data, args.load == 1)
    model.print_result(t1)

    # test
    model.load_model()
    model.predict(data.test_data)
Exemple #13
0
def feature_extraction_NFM(X, y_binary, i_Pos_sample_set, i_Neg_sample_set,
                           args):

    #the labeled data will come from both pos and neg
    X_label = [X[i] for i in set.union(i_Pos_sample_set, i_Neg_sample_set)]
    y_label = [
        y_binary[i] for i in set.union(i_Pos_sample_set, i_Neg_sample_set)
    ]

    X_train = np.asarray(X_label)
    Y_train = np.asarray(y_label)
    X_validation = np.asarray(X_label)
    Y_validation = np.asarray(y_label)
    X_test = copy.deepcopy(
        X
    )  #set the whole dataset for test, so that we will get the new features
    Y_test = copy.deepcopy(y_binary)

    data = LoadData(args.loss_type, X_train, Y_train, X_validation,
                    Y_validation, X_test, Y_test)

    # Training
    model = NeuralFM(data.features_M, args.hidden_factor, args.layers,
                     args.loss_type, args.pretrain, args.epoch,
                     args.batch_size, args.lr, args.lamda, args.keep_prob,
                     args.optimizer, args.batch_norm, args.activation_function,
                     args.verbose, args.early_stop)
    model.train(data.Train_data, data.Validation_data, data.Test_data)

    features = model.get_deep_feature(
        data.Test_data)  #model.get_bi_feature(data.Test_data)#
    return features
Exemple #14
0
def show_no_temperature_difference():
    """
    Plot the estimate of the phase at different temperatures.
    """
    l_var = []
    l_var_var = []
    l_temp = [34,37,40]
    for temperature in l_temp:
        path =  "Data/NIH3T3.ALL.2017-04-04/ALL_TRACES_INFORMATION.p"
        dataClass=LoadData(path, 10000000, temperature = temperature,
                            division = False)
        (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak, \
        ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)
        var,var_var = compute_phase_variance_with_confidence(ll_peak)
        l_var.append(var)
        l_var_var.append(var_var)

    plt.errorbar(l_temp, l_var, yerr = l_var_var, fmt='o')
    plt.xlim([33,41])
    plt.xlabel("Temperature")
    plt.ylabel("Phase diffusion variance mean and deviation")
    plt.savefig('Results/RawData/var_diffusion.pdf')
    plt.show()
    plt.close()
Exemple #15
0
def compute_likelihood_sigma():
    """
    Compute and plot the likelihood of the phase diffusion parameter,
    depending on the temperature.
    """
    l_T = [34,37,40]
    l_likelihood_T = []
    mean_IG = 24
    domain_sigma = np.linspace(0.05, 0.3, 100)

    for T in l_T:

        path =  "Data/NIH3T3.ALL.2017-04-04/ALL_TRACES_INFORMATION.p"
        dataClass=LoadData(path, 10000000, temperature = T, division = False)
        (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak,
        ll_idx_cell_cycle_start, T_theta, T_phi) = \
                                            dataClass.load(load_annotation=True)

        l_T_clock=[]
        for l_peak in ll_peak:
            l_idx_peak = [idx for idx, i in enumerate(l_peak) if i==1]
            for t_peak_1, t_peak_2 in zip(l_idx_peak[:-1], l_idx_peak[1:]):
                #remove outliers
                T = (t_peak_2-t_peak_1)/2
                if T<12 or T>38:
                    #double or missing annotation
                    pass
                else:
                    l_T_clock.append(T )

        l_likelihood = []
        for sigma_theta in domain_sigma:
            lpx = np.log(1/sigma_theta)
            #lpx = 1
            for T in l_T_clock:
                lpx = lpx + np.log(invgauss(T, mean_IG,
                                            4*np.pi**2/sigma_theta**2))
            l_likelihood.append( lpx/len(l_T_clock))
        l_likelihood_T.append(l_likelihood)


    plt.plot(domain_sigma,l_likelihood_T[0], c = 'red', label = '34' )
    plt.plot(domain_sigma,l_likelihood_T[1], c = 'blue',label = '37' )
    plt.plot(domain_sigma,l_likelihood_T[2], c = 'orange',label = '40' )
    plt.axvline(domain_sigma[np.argmax(l_likelihood_T[0])], c= 'red')
    plt.axvline(domain_sigma[np.argmax(l_likelihood_T[1])], c= 'blue')
    plt.axvline(domain_sigma[np.argmax(l_likelihood_T[2])], c= 'orange')


    plt.ylabel(r'$log(L(\sigma_\theta))$')
    plt.xlabel(r'$\sigma_\theta$' )
    plt.legend()
    plt.savefig('Results/RawData/likelihood_sigma_theta.pdf')
    plt.show()
    plt.close()
    def __init__(self):

        self.rawdata = LoadData('.\ex1data1.txt')
        self.X, self.y, self.batch_size = self.rawdata.loadTXT()
        self.theta = np.array([[0.], [0.]])
        self.alpha = 0.01
        self.costlst = []
        self.thetalst = []

        print(self.theta.shape)
        print(self.batch_size)
        print(np.sum(self.X[:, 1]))
        print(np.sum(self.y))
Exemple #17
0
 def train(self):
     l = LoadData()
     stopWords = l.loadStopWords()
     self.loadDataCSV('bbc-text.csv')
     vect = TfidfVectorizer(stop_words=stopWords)
     self.train_and_test_split(0.75)
     X_train = vect.fit_transform(self.train_data['data'])
     Y_train = self.train_data['class_name']
     X_test = vect.transform(self.test_data['data'])
     Y_test = self.test_data['class_name']
     nb = MultinomialNB()
     Y_pred = nb.fit(X_train, Y_train).predict(X_test)
     self.metric(Y_test, Y_pred)
Exemple #18
0
def load_model_and_predict(data=None):

    transaction_classifier = TransactionClassifier()
    transaction_classifier.load_model()

    if data is None:
        data = LoadData(x_input_features=[5, 6, 7])
        data.load_processed_data()

    val_data = data.load_validation_data()
    test_data = data.load_test_data()

    print(transaction_classifier.get_confusion_matrix(val_data))
    print(transaction_classifier.get_confusion_matrix(test_data))
Exemple #19
0
    def __init__(self):
        
        self.rawdata = LoadData('.\ex2data1.txt')
        self.X, self.y, self.batch_size, rawdata = self.rawdata.loadTXT()
        self.theta = np.array([[0.],[0.],[0.]])
        self.alpha = 0.01
        self.costlst = []
        self.thetalst = []
        self.rawdata_p = rawdata[np.where(rawdata[:,2]==1)]
        self.rawdata_n = rawdata[np.where(rawdata[:,2]==0)]
#         print(self.theta.shape)
#         print(self.batch_size)
#         print(self.rawdata_p)
#         print(self.rawdata_n)
        self.y = self.y[0]
def plot_hist_periods(cell, temperature, division):
    """
    Given a cell condition, compute and plot a histgram of periods.

    Parameters
    ----------
    cell : string
        Cell condition.
    """
    ##################### LOAD DATA ##################
    if cell == 'NIH3T3':
        path = "../Data/NIH3T3.ALL.2017-04-04/ALL_TRACES_INFORMATION.p"
    else:
        path = "../Data/U2OS-2017-03-20/ALL_TRACES_INFORMATION_march_2017.p"
    dataClass = LoadData(path,
                         10000000,
                         temperature=temperature,
                         division=division)
    (ll_area, ll_signal, ll_nan_circadian_factor, ll_obs_phi, ll_peak,
     ll_idx_cell_cycle_start, T_theta, T_phi) \
                                        = dataClass.load(load_annotation=True)

    if division:
        ##################### COMPUTE CELL CYCLE DISTRIBUTION ##################
        l_T_cell_cycle = []
        for l_div_index in ll_idx_cell_cycle_start:
            for t1, t2 in zip(l_div_index[:-1], l_div_index[1:]):
                l_T_cell_cycle.append((t2 - t1) / 2)
    ##################### COMPUTE CIRCADIAN CLOCK DISTRIBUTION #################
    l_T_clock = []
    for l_peak in ll_peak:
        l_idx_peak = [idx for idx, i in enumerate(l_peak) if i == 1]
        for t_peak_1, t_peak_2 in zip(l_idx_peak[:-1], l_idx_peak[1:]):
            l_T_clock.append((t_peak_2 - t_peak_1) / 2)
    ##################### PLOT BOTH DISTRIBUTIONS ##################
    bins = np.linspace(8, 38, 40)
    if division:
        plt.hist(l_T_cell_cycle, bins, alpha=0.5, label='cell-cycle')
    plt.hist(l_T_clock, bins, alpha=0.5, label='clock')
    plt.legend(loc='upper right')
    if division:
        plt.savefig('../Results/RawData/Distributions_div_'+str(temperature)\
                    +"_"+cell+'.pdf', bbox_inches='tight')
    else:
        plt.savefig('../Results/RawData/Distributions_nodiv_'+str(temperature)\
                    +"_"+cell+'.pdf', bbox_inches='tight')
    plt.close()
Exemple #21
0
    def __init__(self):

        self.headerFiles={
            "abslogout": "conf/wifi_abslogout_header",
            "login": "******",
            "auth": "conf/wifi_auth_header",
            "logout": "conf/wifi_logout_header"
    }
        self.paramFiles={
            "abslogout": "conf/wifi_abslogout_params",
            "login": "******",
            "auth": "conf/wifi_auth_params",
            "logout": "conf/wifi_logout_params"
        }

        self.outFiles = {
            "abslogout": "out/abslogout.txt",
            "login": "******",
            "auth": "out/auth.txt",
            "logout": "out/logout.txt"
        }

        ld = LoadData()
        urls = ld.loadUrls('conf/urls')
        self.urls={
            "abslogout":urls['abslogout'],
            "login": urls['login'],
            "logout": urls['logout'],
            "auth": urls['auth']
        }

        edatas = ld.loadParams('conf/email',split="=")
        self.eml = MyEmail()
        self.eml.setUser(edatas['msg_from'],edatas['msg_to'],edatas['passwd'])

        # self.absLogoutUrl = "http://ipgw.neu.edu.cn/include/auth_action.php"
        # self.loginUrl = "http://ipgw.neu.edu.cn/srun_portal_pc.php?ac_id=1&"
        # self.logoutUrl = "http://ipgw.neu.edu.cn/srun_portal_pc.php?ac_id=1&"
        # self.authUrl = "http://ipgw.neu.edu.cn/include/auth_action.php?"

        self.ld = ld
        if not os.path.exists("out"):
            os.mkdir('out')
        if not os.path.exists('conf'):
            print("配置文件损坏,无法运行,请自行查看代码修复!很容易")
    def optimize(self):
        """
        Doing the actual optimiziation
        """
        batch_size, finetune_lr = self.batch_size, self.finetune_lr
        batch_size, n_epochs = self.batch_size, self.training_epochs

        data = LoadData(self.link)
        datasets = data.load_data()
        train_set_x = datasets[0][0]
        n_train_batches = train_set_x.get_value(
            borrow=True).shape[0] // batch_size

        # numpy random generator
        numpy_rng = numpy.random.RandomState(123)

        # construct the Deep Belief Network
        dbn = DBN(numpy_rng=numpy_rng,
                  output_layer=LinearRegression,
                  n_ins=1,
                  hidden_layers_sizes=[3, 3],
                  n_outs=1)

        # Pretraining
        pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size,
                                                    k=k)
        self.__pretraining(dbn.n_layers, pretraining_fns, n_train_batches)

        # Backpropagation
        train_fn, validate_model, test_model = dbn.build_finetune_functions(
            datasets=datasets,
            batch_size=batch_size,
            learning_rate=finetune_lr)
        models = (train_fn, validate_model, test_model)
        finetuning = Optimization(batch_size=batch_size, n_epochs=n_epochs)
        finetuning.Backpropagation(models, datasets)

        test = theano.function(inputs=[dbn.x], outputs=dbn.output_layer.y_pred)
        prediction = test(datasets[2][0].get_value())
        import pdb
        pdb.set_trace()
        return dbn
Exemple #23
0
 def loadData(self):
     l = LoadData()
     data_cluster = l.loadData('wine-clustering.csv')
     self.std_scaler = StandardScaler()
     self.min_max_scaler = MinMaxScaler()
     data_cluster[data_cluster.columns] = self.std_scaler.fit_transform(
         data_cluster)
     data_cluster[data_cluster.columns] = self.min_max_scaler.fit_transform(
         data_cluster)
     # print(data_cluster.mean())
     # data = data_cluster.to_numpy()
     # np.savetxt('data.txt', data, fmt='%.1f')
     # coverience_matrix = np.dot(np.transpose(data),
     #                            data) / (data.shape[1] - 1)
     # np.savetxt('matrix.txt', coverience_matrix, fmt='%.1f')
     # pca
     self.pca_2 = PCA(2)
     self.pca_2_result = self.pca_2.fit_transform(data_cluster)
     self.data = data_cluster
Exemple #24
0
def train_and_get_metrics():

    print('LOADING DATA')
    # Load Data
    data = LoadData(x_input_features=[5, 6, 7])
    data.load_processed_data()

    train_data = data.load_train_data()
    validation_data = data.load_validation_data()
    test_data = data.load_test_data()

    print('TRAINING CLASSIFIER')
    # Run Model
    transaction_classifier = TransactionClassifier()
    transaction_classifier.train(train_data)

    print('TESTING CLASSIFIER')
    train_results = transaction_classifier.test(train_data)
    val_results = transaction_classifier.test(validation_data)
    test_results = transaction_classifier.test(test_data)

    print('STATS:')
    print('------------ TRAIN SET -------------')
    print('LENGTH =', len(train_data[0]))
    print('Metrics:\n', train_results)

    print('------------ VALIDATION SET -------------')
    print('LENGTH =', len(validation_data[0]))
    print('Metrics:\n', val_results)

    print('------------ TEST SET -------------')
    print('LENGTH =', len(test_data[0]))
    print('Metrics:\n', test_results)

    print('++++++++++++++++++++++++++++++++++++++++')

    print('SAVING CLASSIFIER')
    transaction_classifier.save_model()
Exemple #25
0
    def train_1_class(self):
        l = LoadData()
        stopWords = l.loadStopWords()
        train_sizes = [100, 200]  # size per class
        for train_size in train_sizes:
            print('Training size:',
                  math.floor(train_size * 0.75) * 2, 'Test size:',
                  math.ceil(train_size * 0.25) * 2)
            self.loadData(train_size)
            vect = TfidfVectorizer(stop_words=stopWords)

            # balance classes
            temp_class = self.data['class_name'][train_size:]
            temp_data = self.data['data'][train_size:]
            idx = random.choices(range(len(temp_class)), k=train_size)
            temp_class = [temp_class[i] for i in idx]
            temp_data = [temp_data[i] for i in idx]
            del self.data['data'][train_size:]
            del self.data['class_name'][train_size:]
            self.data['class_name'].extend(temp_class)
            self.data['data'].extend(temp_data)

            self.train_and_test_split(0.75)
            X_train = vect.fit_transform(self.train_data['data'])
            Y_train = [
                1 if i == 'business' else 0
                for i in self.train_data['class_name']
            ]
            X_test = vect.transform(self.test_data['data'])
            Y_test = [
                1 if i == 'business' else 0
                for i in self.test_data['class_name']
            ]
            nb = MultinomialNB()
            Y_pred = nb.fit(X_train, Y_train).predict(X_test)
            self.metric(Y_test, Y_pred)
            print('---------------------------------------------------')
Exemple #26
0
    def generate_report(self, input_file_name, output_file_name, athlete_name,
                        is_running_file, date):
        """
        :param input_file_name: Name of the csv file containing data
        :param output_file_name: Name of the tex file that is generated
        :param athlete_name: Name of the athlete separated by an underscore
        :param is_running_file: Boolean, either true or false
        :param date: The date associated with the file in format 'yyyy_mm_dd'
        :return: Nothing

        Generate the tex file that will later be compiled into a pdf.
        """
        data = LoadData().load_csv(input_file_name)
        stats = StatisticsAndPlots()
        self.build_tex(output_file_name, athlete_name, is_running_file,
                       ("%.2f" % stats.get_average_speed(data)),
                       ("%.2f" % stats.get_average_heart_rate(data)),
                       ("%.2f" % stats.get_average_cadence(data)),
                       (None if is_running_file else "%.2f" %
                        stats.get_average_power(data, is_running_file)),
                       stats.make_pace_histogram(data, date, athlete_name),
                       stats.make_zone_histogram(data, date, athlete_name),
                       stats.make_raw_plot(data, date, athlete_name), date)
        self.number_of_files_created += 1
Exemple #27
0
filename2 = "/workspace/data/labels1.txt"
counter = 1

start = time.time()

filename = filename[:-4 - len(str(counter - 1))] + str(counter) + filename[-4:]

data = np.memmap('newdata.array',
                 dtype=np.float64,
                 mode='w+',
                 shape=(1300000, 100, 40, 3))

print("[INFO] Loading first file... ")

with open(filename, "r") as file:
    data_temp = LoadData(file)

length = data_temp.shape[0]
data[0:length] = data_temp
counter = counter + 1

end = time.time()
elapsed = end - start

print("[INFO] Finished loading first file, elapsed time: " + str(elapsed))
print("[INFO] data shape: " + str(data.shape))
print("[INFO] length: " + str(length))

while 1:
    print("[INFO] Loading file " + str(counter) + " ...")
    start = time.time()
Exemple #28
0
import sys
sys.path.append("../Basic Functions")
import AdaBoost
from LoadData import LoadData

if __name__ == '__main__':
    trainingDataArray, trainingLabelList = LoadData("HorseColicTraining.txt")
    classifierList, totalPredictValue = AdaBoost.AdaboostTrain(
        trainingDataArray, trainingLabelList, 10)
    testDataArray, testLabelList = LoadData("HorseColicTest.txt")
    result = AdaBoost.AdaClassify(testDataArray, classifierList)
    errorList = [
        i for i in range(len(testLabelList)) if testLabelList[i] != result[i]
    ]
    print(errorList)

    AUC = AdaBoost.PlotROC(trainingLabelList, totalPredictValue)
    print(AUC)
Exemple #29
0
 def load_data_from_pickle_file(self):
     self.grid, self.entry_coords, self.exit_coords, self.solution_path, self.path_value, self.obstacle_value = LoadData(
         self.filename).load_data_from_pickle_file()
     if self.display_maze:
         self.show_maze()
Exemple #30
0
 def load_grid_from_csv(self):
     self.grid = LoadData(self.filename).load_grid_from_csv()
     if self.display_maze:
         self.show_maze()