コード例 #1
0
def load_data(do_plots=False):
    train_df = pd.read_csv('train.csv.gz', compression='gzip')
    test_df = pd.read_csv('test.csv.gz', compression='gzip')
    submit_df = pd.read_csv('sampleSubmission.csv.gz', compression='gzip')
    bid_df = pd.read_csv('bid_reduced.csv.gz', compression='gzip')

    train_df = train_df.merge(bid_df, on='bidder_id', how='inner')
    test_df = test_df.merge(bid_df, on='bidder_id', how='inner')

    train_df = clean_data(train_df)
    test_df = clean_data(test_df)

    if do_plots:
        from plot_data import plot_data
        plot_data(train_df, prefix='html_train')
        plot_data(test_df, prefix='html_test')

    print train_df.dtypes
    print test_df.dtypes
    print submit_df.dtypes

    xtrain = train_df.drop(labels=['outcome', 'bidder_id'], axis=1).values
    ytrain = train_df['outcome'].values
    xtest = test_df.drop(labels=['bidder_id'], axis=1).values
    ytest = submit_df
    y_id = list(test_df['bidder_id'])

    return xtrain, ytrain, xtest, ytest, y_id
コード例 #2
0
def subreddit_graphs(data, subreddit_name, which_graph):
    plots = []
    for i in range(1, 5):
        plots.append((2, 2, i))
    if which_graph == 'line graph':
        plt.figure(num=None,
                   figsize=(16, 6),
                   dpi=80,
                   facecolor='w',
                   edgecolor='k')
        for i in range(len(data)):
            plt.subplot(plots[i][0], plots[i][1], plots[i][2])
            plot_data(data[i][subreddit_name],
                      subreddit_name,
                      " ",
                      compiling=True)
            plt.title(graph_type[i])
        plt.suptitle(subreddit_name, y=1)
    elif which_graph == 'table':
        _, axes = plt.subplots(2, 2)
        #Sub_Rates, Comement_Rates, Sub_Bi, Comment_Bi
        pos = ((-1.2, 1.2), (0, 1.2), (-1.2, 0), (0, 0))
        for i in range(len(data)):
            average = table(data[i][subreddit_name], subreddit_name,
                            graph_type[i], axes, i)
            plt.text(
                pos[i][0], pos[i][1],
                "Subreddit " + subreddit_name + " has an average rate of " +
                str(average)[:6] + " " + graph_type[i] + " per 2 hours.")
    plt.show()
コード例 #3
0
ファイル: main.py プロジェクト: sarinaxie/K9-trainer
def compare_errors(k_vals, input_data_file):
    ## read in the input data
    initial_data = create_data(input_data_file)

    ## create plots of the data (this should save the images within the current
    ## directory)
    plot_data(initial_data)

    ## integerize the data labels
    integerized_data, label_dict = integerize_labels(initial_data)

    ## split the data into train and test
    train, test = split(integerized_data)

    ## compute the errors
    errors = {}
    for k in k_vals:
        predicted_labels = knn(train, test, k)
        error_rate = calculate_error_rate(predicted_labels, test)
        errors[k] = error_rate

    ## BONUS: weighting
    for k in k_vals:
        weighted_predicted_labels = weighted_knn(train, test, k)
        weighted_error_rate = calculate_error_rate(weighted_predicted_labels,
                                                   test)
        print("Weighted error value for k = %d was %f" %
              (k, weighted_error_rate))

    return errors
コード例 #4
0
def load_data(do_drop_list=False, do_plots=False):
    train_df = pd.read_csv('train.csv.gz', compression='gzip')
    test_df = pd.read_csv('test.csv.gz', compression='gzip')
    submit_df = pd.read_csv('sampleSubmission.csv.gz', compression='gzip')

    train_df = clean_data(train_df)
    test_df = clean_data(test_df)

    print train_df.columns
    print test_df['City Group'].describe()

#    print train_df['revenue'].describe()
#    for col in test_df.columns:
#        print '\'%s\': [%d, %d, 0],' % (col, min(train_df[col].min(), test_df[col].min()), \
#                   max(train_df[col].max(), test_df[col].max()))

    if do_plots:
        from plot_data import plot_data
        plot_data(train_df, prefix='html_train')
        plot_data(test_df, prefix='html_test')

    ### wanted to keep track of feature_list
    feature_list = train_df.drop(['Id', 'revenue'], axis=1).columns
    print 'features', list(feature_list)


    xtrain = train_df.drop(labels=['Id', 'revenue'], axis=1).values
    ytrain = train_df['revenue'].values
    xtest = test_df.drop(labels=['Id'], axis=1).values
    ytest = submit_df
    return xtrain, ytrain, xtest, ytest, feature_list
コード例 #5
0
def load_data(do_drop_list=False, do_plots=False):
    train_df = pd.read_csv('train_full.csv.gz', compression='gzip')
    test_df = pd.read_csv('test_full.csv.gz', compression='gzip')
    submit_df = pd.read_csv('sample_submit_full.csv.gz', compression='gzip')

    train_df = clean_data(train_df)
    test_df = clean_data(test_df)

#    print train_df.columns
#    print test_df.columns
    #print submit_df.columns

#    for col in train_df.columns:
#        if any(train_df[col].isnull()):
#            print col, train_df[col].dtype

    if do_plots:
        from plot_data import plot_data
        plot_data(train_df, prefix='html_train')
        plot_data(test_df, prefix='html_test')

    unitlist = ['units%d' % (idx+1) for idx in range(111)]

    ### wanted to keep track of feature_list
    feature_list = train_df.drop(['store_nbr', 'station_nbr']+unitlist, axis=1).columns
#    print 'features', list(feature_list)

    xtrain = train_df.drop(labels=['store_nbr', 'station_nbr']+unitlist, axis=1).values
    ytrain = train_df[unitlist].values
    xtest = test_df.drop(labels=['store_nbr', 'station_nbr'], axis=1).values
    ytest = submit_df

#    xtrain, ytrain, xtest, ytest, feature_list = 5*[None]
    return xtrain, ytrain, xtest, ytest, feature_list
コード例 #6
0
def count_steps(data):
    print 'count_steps'
    plot_data(data)
    mag = vector_magnitude(data)
    plot_mag(mag)
    average = moving_average(data, 100)
    plot_mag(average)
    num_steps = 0
    '''
    This function counts the number of steps in data and returns the number of steps
    '''
    i = 0
    found = False
    stepArray = []
    for x in average:
        if (x >= 4 and x <= 4.03):
            if found == False:
                num_steps = num_steps + 1
                stepArray.append(i)
                found = True
            else:
                found = False
        i = i + 1

    plot_steps(average, stepArray)

    return num_steps
コード例 #7
0
def load_data(do_plots=False):
    train_df = pd.read_csv('train_full.csv.gz', compression='gzip',
                           low_memory=False)
    test_df = pd.read_csv('test_full.csv.gz', compression='gzip',
                          low_memory=False)
    submit_df = pd.read_csv('sampleSubmission.csv.gz', compression='gzip')

    train_df = clean_data(train_df)
    test_df = clean_data(test_df)

    print(submit_df.dtypes)

    for col in test_df.columns:
        if (test_df[col].isnull()).sum() > 0:
            print(col, test_df[col].dtype)
#    print(sorted(train_df['is_sat_trap'].unique()))
#    print(sorted(test_df['Species'].unique()))
    if do_plots:
        from plot_data import plot_data
        plot_data(train_df, prefix='train_html')
        plot_data(test_df, prefix='test_html')

    features = train_df.drop(labels=['NumMosquitos', 'WnvPresent'],
                           axis=1).columns

    xtrain = train_df.drop(labels=['NumMosquitos', 'WnvPresent'],
                           axis=1).values
    ytrain = train_df[['NumMosquitos', 'WnvPresent']].values
    xtest = test_df.drop(labels=['Id'], axis=1).values
    ytest = submit_df
    return xtrain, ytrain, xtest, ytest, features
コード例 #8
0
ファイル: lab7.py プロジェクト: ALEXKIRNAS/KPI-Semester-4
def compare(data):
    """
    Функція порівнює роботу двох жадібних алгоритмів для розв'язання задачі
    мінімізації зваженої суми часу закінчення робіт, які використовують
    різні критерії для впорядкування робіт (див. функції schedule_dif та 
    schedule_ratio). 
    Порівняння проводиться для задач різної розмірності.
    Параметри:
        data - вхідний масив робіт (для деталей див. коментарі в load_data)
    """
    data_plot = {'dif': {}, 'ratio': {}}

    # параметри для проведення експерименту
    n_begin = 10  # початкова розмірність задачі
    n_end = len(data)  # кінцева розмірність задачі
    n_step = 10  # крок розмірності

    for n in range(n_begin, n_end + 1, n_step):
        sum_1 = schedule_dif(data[:n + 1])
        sum_2 = schedule_ratio(data[:n + 1])
        print("N:", n, "Sum_dif:", sum_1, "Sum_ratio:", sum_2)

        data_plot['dif'][n] = sum_1
        data_plot['ratio'][n] = sum_2

    plot_data(data_plot, oneplot=True, show_markers=False)
コード例 #9
0
    def _plot_chart_button_fired(self):
        """Method to plot the selected data"""
        # Read TableEditor to see what the user has chosen to
        data_to_plot = []
        for i in range(0, len(self.correlpairs)):
            if i == len(self.correlpairs) - 1:
                pair_name = ['BASKET CORREL', 'BASKET CORREL']
            else:
                pair_name = self.correlpairs[i].correl_pair.split('-')

            if self.correlpairs[i].time_window_1:
                data_to_plot.append(
                    (pair_name[0].strip(), pair_name[1].strip(),
                     self.time_windows_input[0][0]))
            if self.correlpairs[i].time_window_2:
                data_to_plot.append(
                    (pair_name[0].strip(), pair_name[1].strip(),
                     self.time_windows_input[0][1]))
            if self.correlpairs[i].time_window_3:
                data_to_plot.append(
                    (pair_name[0].strip(), pair_name[1].strip(),
                     self.time_windows_input[0][2]))
            if self.correlpairs[i].time_window_4:
                data_to_plot.append(
                    (pair_name[0].strip(), pair_name[1].strip(),
                     self.time_windows_input[0][3]))
            if self.correlpairs[i].time_window_5:
                data_to_plot.append(
                    (pair_name[0].strip(), pair_name[1].strip(),
                     self.time_windows_input[0][4]))

        # Plot
        pl.plot_data(self.corr_data[0], self.corr_data[1], data_to_plot)
コード例 #10
0
def compare_merge_impr_and_hybrid():
    """
    Процедура порівняння двох методів сортування: злиттям та гібридного,
    який ґрунтується на методах включення та злиття.
    Детальніше - див. функцію compare_ins_and_merge()
    """
    # параметри для проведення експерименту
    repeats = 5  # кількість запусків для однієї розмірності
    n_begin = 1000  # початкова розмірність задачі
    n_end = 10000  # кінцева розмірність задачі
    n_step = 300  # крок розмірності

    types = ["random"]
    data_plot = {'random': {'merge_impr': {}, 'hybrid': {}}}
    data_plot_2 = {'ratio': {'merge_impr/hybrid': {}}}
    for n in xrange(n_begin, n_end + 1, n_step):
        print "\nDATA SIZE: ", n

        for gen_type in types:
            data = [generate_data(n) for i in xrange(repeats)]

            t_merge = test(mergeSort_impr, deepcopy(data))
            print "Merge_impr time for size", n, ":", t_merge
            data_plot[gen_type]['merge_impr'][n] = t_merge

            t_hybrid = test(hybrid_sort, deepcopy(data))
            print "Hybrid time for size", n, ":", t_hybrid
            data_plot[gen_type]['hybrid'][n] = t_hybrid

            print "Ratio merge_impr/hybrid:", t_merge / t_hybrid
            data_plot_2['ratio']['merge_impr/hybrid'][n] = t_merge / t_hybrid

    # побудувати графіки швидкості роботи алгоритмів
    plot_data(data_plot, logarithmic=False, oneplot=True, data_2=data_plot_2)
コード例 #11
0
def load_data(do_plots=False):
    train_df = pd.read_csv('train.csv.gz', compression='gzip')
    test_df = pd.read_csv('test.csv.gz', compression='gzip')
    submit_df = pd.read_csv('sampleSubmission.csv.gz', compression='gzip')
    bid_df = pd.read_csv('bid_reduced.csv.gz', compression='gzip')

    train_df = train_df.merge(bid_df, on='bidder_id', how='inner')
    test_df = test_df.merge(bid_df, on='bidder_id', how='inner')
    
    train_df = clean_data(train_df)
    test_df = clean_data(test_df)
    
    if do_plots:
        from plot_data import plot_data
        plot_data(train_df, prefix='html_train')
        plot_data(test_df, prefix='html_test')

    print train_df.dtypes
    print test_df.dtypes
    print submit_df.dtypes

    xtrain = train_df.drop(labels=['outcome','bidder_id'], axis=1).values
    ytrain = train_df['outcome'].values
    xtest = test_df.drop(labels=['bidder_id'], axis=1).values
    ytest = submit_df
    y_id = list(test_df['bidder_id'])

    return xtrain, ytrain, xtest, ytest, y_id
コード例 #12
0
def run_prog():
    t0 = time.clock()
    LED_Grid = np.mgrid[0:LightW:dim_x * 1j, 0:LightL:dim_y * 1j].reshape(
        2, -1).T  # e.g. 16 x 16 LED grids
    t1 = time.clock()
    data = trace_rays.trace_rays(LED_Grid, L, height)
    data[:, 2], data[:, 3] = data[:, 2] * 180 / np.pi, data[:, 3] * 180 / np.pi

    t2 = time.clock()
    #print("calcuating light wafer intensity (for diffuser)")
    #print("")
    wafer_data = light_ray_intensity.intensity(data)

    t3 = time.clock()
    #myconfig.save_data(data)

    print_uniformity.print_uniformity(data)

    t4 = time.clock()
    print("plotting graphs")
    print("")
    plot_data.plot_data(data)
    plot_data.plot_data(wafer_data)

    t5 = time.clock()

    print("setup:" + str((t1 - t0) * 1000) + "ms")
    print("calc rays :" + str((t2 - t1) * 1000) + "ms")
    print("calc intensity:" + str((t3 - t2) * 1000) + "ms")
    print("calc statistics:" + str((t4 - t3) * 1000) + "ms")
    print("plot:" + str((t5 - t4) * 1000) + "ms")
    norm.append((t2 - t1) * 1000)
    numpy.append((t3 - t2) * 1000)
コード例 #13
0
def visualize_boundary_linear(X, y, model):
    w = model.coef_.reshape(-1)
    b = model.intercept_
    xp = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100)
    yp = -(w[0] * xp + b) / w[1]
    print(yp)
    plot_data(X, y)
    plt.plot(xp, yp)
コード例 #14
0
def count_steps(data):
    print "Accelerometer data graph"
    plot_data(data)
    num_steps = 0
    '''
    ADD YOUR CODE HERE. This function counts the number of steps in data and returns the number of steps
    '''
    return num_steps
コード例 #15
0
ファイル: circuits.py プロジェクト: bshlgrs/circuit-solver
def evolve(circuit,startState,timelimit,timestep,printtimes,plotting=True):
    
    time=0
    nextprinttime=0
    
    if startState==None:
        inductorMode = False
    else:
        currentsList = startState
        inductorMode = True
    
    if plotting:
        plotlist=[]
    
    while time<timelimit:
        
      #  print circuit
        
        stuff = allEquations(circuitToDetailsList(circuit))
        
    #    print stuff
        
        if inductorMode:
            currentsList=circuitSolve.solveCurrents( \
                    stuff,currentsList,timestep)
        else:
            currentsList=circuitSolve.findEquilibriumCurrents(stuff)
        
     #   print "lol",currentsList
        
        #timemodule.sleep(0.01)
        
     #   print "HOLY F*****G SHIT"
      #  print currentsList
    
        if time>nextprinttime:
            if not plotting:
                print "\t".join(str(x) for x in 
                    format_data(circuit,currentsList,time))
            else:
                plotlist.append(format_data(circuit,currentsList,time))
                
            nextprinttime+=printtimes

        for (pos,connection) in enumerate(circuit):
            for item in connection[2]:
                item.updateSelf(timestep,currentsList[pos])
        
        time += timestep
    
    if not plotting:
        print "\t".join(str(x) for x in 
                format_data(circuit,currentsList,time))
    else:
        
        plot_data.plot_data(plotlist)
コード例 #16
0
def visualize_boundary_linear(X, y, clf):

    plot_data(X, y)

    coef = clf.coef_.ravel()
    intercept = clf.intercept_.ravel()

    xp = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100)
    yp = -1.0 * (coef[0] * xp + intercept[0]) / coef[1]

    plt.plot(xp, yp, linestyle='-', color='b')
コード例 #17
0
def visualize_boundary(X, y, model):
    plot_data(X, y)
    x1plot = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100).T
    x2plot = np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100).T
    X1, X2 = np.meshgrid(x1plot, x2plot)
    vals = np.zeros(X1.shape)

    for i in range(np.size(X1, 1)):
        this_X = np.array([X1[:, i], X2[:, i]])
        vals[:, i] = model.predict(this_X.T)
    plt.contour(X1, X2, vals, 0.5)
コード例 #18
0
ファイル: baro.py プロジェクト: chanhee-lee/WristWashAnalysis
def count_steps(data):
    print 'count_steps'
    # Different Algo 
    num_steps = 0
    plot_data(data)
    plot_mag(vector_magnitude(data))
    plot_mag(moving_average((data),230))

    '''
    This function counts the number of steps in data and returns the number of steps
    '''
    return num_steps
コード例 #19
0
def main():
    # Get data
    file_name = "walking_steps_1.csv"  # Change to your file name
    data = parser_data.get_data(file_name)  #data -- time,X,Y,Z
    clean_data(data, 350, 1113, 'walking_steps_1_cleaned.csv')
    data = parser_data.get_data("walking_steps_1_cleaned.csv")
    plot_data(data)
    file_name = "walking_steps_2.csv"  # Change to your file name
    data = parser_data.get_data(file_name)  #data -- time,X,Y,Z
    clean_data(data, 200, 1099, 'walking_steps_2_cleaned.csv')
    data = parser_data.get_data("walking_steps_2_cleaned.csv")
    plot_data(data)
コード例 #20
0
def count_steps(data):
    print "Accelerometer data graph"
    plot_data(data)
    mag = vector_magnitude(data)
    plot_mag(mag)
    average = moving_average(data, 10)
    plot_mag(average)
    num_steps = 0
    '''
    ADD YOUR CODE HERE. This function counts the number of steps in data and returns the number of steps
    '''

    return num_steps
コード例 #21
0
def run_task4():
    """
    Перевірка подвійного сортування двох масивів в рамках завдання 4.
    Процедура проводить серію експериментів з різними розмірностями масивів n:
    від n_begin до n_end з кроком n_step, для кожного n виконуючи repeats 
    викликів. По закінченню експериментів будується графік, який відображає 
    кількість операцій порівняння під час роботи процедури сортування, а також
    графіки k*nlogn, де k = {1, 2, 3, 4}. Останні дозволяють дати оцінку 
    швидкодії розробленого алгоритму сортування.
    """
    global counter
    # параметри для проведення експерименту
    repeats = 10  # кількість запусків для однієї розмірності
    n_begin = 10  # початкова розмірність задачі
    n_end = 1000  # кінцева розмірність задачі
    n_step = 10  # крок розмірності

    data_plot = {
        'random': {
            'quick': {},
            'nlogn': {},
            '2nlogn': {},
            '3nlogn': {},
            '4nlogn': {}
        }
    }
    for n in range(n_begin, n_end + 1, n_step):
        counter = 0
        for i in range(repeats):
            a, b = generate_double_data(n)
            double_quick_sort(a, b, 0, len(a))
            if not check_double_result(a, b):
                return
        counter = counter / repeats
        data_plot['random']['quick'][n] = counter
        data_plot['random']['nlogn'][n] = n * np.log2(n)
        data_plot['random']['2nlogn'][n] = 2 * n * np.log2(n)
        data_plot['random']['3nlogn'][n] = 3 * n * np.log2(n)
        data_plot['random']['4nlogn'][n] = 4 * n * np.log2(n)
        print n, ":", counter

    plot_data(data_plot,
              logarithmic=False,
              oneplot=True,
              label_sort_type=False,
              label_data2_label=False,
              data_label='Number of operations',
              legend_pos=2,
              show_markers=False)
コード例 #22
0
ファイル: task4.py プロジェクト: rtierney123/SensoryActivity
def segment_climbing_walking(data):
    '''
    While collecting data on stairs there were times when you were also walking rather than climbing
    It is importing to remove the parts from the data where you were walking in between the flight of stairs
    Write your own algorithm to find segments in data which corresponds to climbing only

    This functions returns
    List of tuples (x,y,z) which corresponds to climbing only.
    i.e. remove data points from the original data which corresponds to walking
    '''

    print('segment_climbing_walking')
    plot_data(data)

    return data
コード例 #23
0
ファイル: task3.py プロジェクト: rtierney123/SensoryActivity
def count_steps(data):
    print("Accelerometer data graph")
    plot_data(data)
    data = np.array(data)
    datapoints = (data[:, 1:])
    magnitudes = vector_magnitude(datapoints)
    plt.plot(magnitudes)
    filtered = moving_average(magnitudes, 10)
    plt.show()

    plt.plot(filtered)
    plt.show()
    num_steps = 0
    '''
    ADD YOUR CODE HERE. This function counts the number of steps in data and returns the number of steps
    '''
    return int(len(get_local_maxima(filtered)) / 2)
コード例 #24
0
def plot(ax):
    print('Plotting data ...\n')

    par_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    ex1_path = os.path.abspath(os.path.join(par_path, 'ex1'))

    data = pd.read_csv(os.path.join(ex1_path, 'ex1data1.txt'),
                       names=['x1', 'y'],
                       header=None)
    plot_data.plot_data(data['x1'], data['y'], ax)

    # make the matrix X and the result vector y
    y = data['y']
    x = pd.DataFrame(np.ones(len(y)), columns=['x0'])
    x['x1'] = data['x1']

    return (x, y)
コード例 #25
0
def main(args):
    model = args.model.model
    model.load_state_dict(torch.load(args.weight))
    model.eval()
    if torch.cuda.is_available():
        model.cuda()

    dyn_model = model.dyn
    vae_model = model.vae

    # Load sequence:
    seq, param = args.data
    original_seq = seq[args.select,:,:]
    trajectory = [original_seq[args.start_step:(args.start_step+1),:]]
    renders = []

    get_image = lambda r: torch.squeeze(vae_model.decode(r).detach().cpu(), dim=0)

    # Start with the first index:
    X = to_variable(torch.tensor(trajectory[0]), cuda=torch.cuda.is_available())
    X.requires_grad_()
    if args.act == "render":
        renders.append(get_image(X))

    for i in range(args.steps):
        X = to_variable((X + dyn_model(X)).data, cuda=torch.cuda.is_available())
        X.requires_grad_()

        trajectory.append(X.detach().cpu().numpy())
        if args.act == "render":
            renders.append(get_image(X))

    trajectory = np.squeeze(np.stack(trajectory), axis=1)

    if args.act == "plot":
        plot_data(args, trajectory, original_seq=original_seq)
    elif args.act == "render":
        if args.save:
            save_image(renders, args.save)
        if args.save_frames:
            for i, im in enumerate(renders):
                save_image(im, args.save_frames.format(i))
コード例 #26
0
def compare_ins_and_merge():
    """
    Процедура порівняння двох методів сортування: включенням та злиттям.
    Порівння алгоритмів ґрунтуєься на дослідженні часу їх роботи (в сек). Для
    цього використовується функція test. 
    Тестування проводиться на задачах різної розмірності: від n_begin до n_end
    з кроком n_step (значення цих параметрів встановлюються в середині процедури)
    Для кожної розмірності генерується repeats екземплярів задачі. При чому 
    обидва алгоритми запускаються на одних і тих самих екземплярах задачі.
    """

    # параметри для проведення експерименту
    repeats = 1000  # кількість запусків для однієї розмірності
    n_begin = 1  # початкова розмірність задачі
    n_end = 200  # кінцева розмірність задачі
    n_step = 1  # крок розмірності

    types = ["random"]
    data_plot = {'random': {'insertion': {}, 'merge': {}}}
    data_plot_2 = {'ratio': {'insertion/merge': {}}}
    for n in xrange(n_begin, n_end + 1, n_step):
        print "\nDATA SIZE: ", n

        for gen_type in types:
            # згенерувати тестові набори даних розмірності n в кількості repeats
            data = [generate_data(n) for i in xrange(repeats)]

            t_insertion = test(insertion_sort, deepcopy(data))
            print "Insertion time for size", n, ":", t_insertion
            data_plot[gen_type]['insertion'][n] = t_insertion

            t_merge = test(merge_sort, deepcopy(data))
            print "Merge time for size", n, ":", t_merge
            data_plot[gen_type]['merge'][n] = t_merge

            print "Ratio insertion/merge:", t_insertion / t_merge
            data_plot_2['ratio']['insertion/merge'][n] = t_insertion / t_merge

    # побудувати графіки швидкості роботи алгоритмів
    plot_data(data_plot, logarithmic=False, oneplot=True, data_2=data_plot_2)
コード例 #27
0
def main():

    data_x, data_y = data_prep('iris.csv')
    plot_data(data_x)
    #Getting input from user
    while True:
        _input = []
        try:
            for i in range(4):
                feature = float(input("Enter feature Number {}:  \n".\
                    format(i+1)))
                _input.append(feature)

            k = int(input("Enter number of Neighbors:   "))

            category = KNN_algorithm(data_x=data_x,data_y=data_y,\
                _input=_input,k=k)

            print(category)
        except Exception as e:
            print(e)
            break
コード例 #28
0
def visualize_boundary(X, y, clf):
    """
    Plots a linear decision boundary learned by the SVM.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
        Samples, where n_samples is the number of samples and n_features is the number of features.
    y : ndarray, shape (n_samples,)
        Labels.
    clf : sklearn.svm.classes.SVC
        The trained SVM.
    """
    plot_data(X, y)
    x1_plot = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100)
    x2_plot = np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)
    X1, X2 = np.meshgrid(x1_plot, x2_plot)
    vals = np.zeros(X1.shape)

    for i in range(X1.shape[1]):
        X_tmp = np.hstack((X1[:, i:i + 1], X2[:, i:i + 1]))
        vals[:, i] = clf.predict(X_tmp)
    plt.contour(X1, X2, vals, levels=[0])
コード例 #29
0
def demo(Index, Nz, Reg_L1, Reg_L2, Bounds, Methods, Plot, Residuals,
         Heatplot):
    """Gets data from interface() and display processed data

    Index     – int value contains an index of transient in dataset
    Nz        – int value which is lenght of calculated vector
    Reg_L1, Reg_L2 – reg. parameters for L1 and L2 regularisation
    Bounds    – list with left and right bound of t-domain
    Methods   – list with methods to process data
    Plot      – boolean which calls plot_data() if true
    Residuals – (not working yet)
    Hetplot   – plots heatplot for all dataset

    """
    import numpy as np
    from read_file import read_file
    from laplace import laplace
    from plot_data import plot_data
    from hp import hp
    from read_file import read_file

    Bounds = 10.0**np.asarray(Bounds)

    s, C, T = read_file('data/EUNB29b_1-16-2/EUNB29b_1-16-2_150_8.DLTS')
    cut = len(T)
    cus = len(C[0])

    data = laplace(s, C[Index] - C[Index][-1], Nz, Reg_L1, Reg_L2, Bounds,
                   Methods)
    if Plot:
        plot_data(s, C[Index] - C[Index][-1], data, T, Index)
    if Residuals:
        print('Plotting L-curve...')
        print(Residuals)
    if Heatplot:
        print('Plotting Heatplot...')
        hp(s, C, T, Methods, Index, Reg_L1, Reg_L2, Bounds, Nz)
コード例 #30
0
def lda(p1_file, p2_file, file_delimiter=',', display_opt_v=True):
    # Reading the population matrices
    population_1, population_2 = read_data(p1_file, p2_file, file_delimiter)

    # Computing the scatter matrices
    p1_scatter = scatter_matrix(population_1)
    p2_scatter = scatter_matrix(population_2)

    # Computing the mean vectors
    p1_attributes, p1_samples = population_1.shape
    p2_attributes, p2_samples = population_2.shape

    p1_mean = np.reshape(population_1.mean(1), (p1_attributes, 1))
    p2_mean = np.reshape(population_2.mean(1), (p2_attributes, 1))

    # Computing the optimization vector
    opt_v = get_opt_vector(p1_scatter, p2_scatter, p1_mean, p2_mean)

    # Computing the projection matrix
    proj_matrix = np.matmul(opt_v, opt_v.T)

    # Plotting initial data
    plt.suptitle('LDA')
    plt.subplot(1, 2, 1)
    plot_data(population_1, population_2, opt_v, 'Initial data', display_opt_v)

    # Computing the projected data matrix
    proj_p1_matrix = np.matmul(proj_matrix, population_1)
    proj_p2_matrix = np.matmul(proj_matrix, population_2)

    # Plotting the projected data
    plt.subplot(1, 2, 2)
    plot_data(proj_p1_matrix, proj_p2_matrix, opt_v, 'Projected data',
              display_opt_v)

    plt.show()
コード例 #31
0
import numpy as np
from matplotlib import pyplot as plt
from plot_data import plot_data
from compute_cost import compute_cost
from normal_equation import normal_equation
from gradient_descent import gradient_descent
from feature_normalize import feature_normalize

data = np.loadtxt('ex1data1.txt', delimiter=',')

X = data[:, 0].reshape(-1, 1)
y = data[:, 1].reshape(-1, 1)

m = len(y)

plot_data(X, y, 'x')

X = np.c_[np.ones((m, 1)), data[:, 0]]
theta = np.zeros((2, 1))

iterations = 1500
alpha = 0.01

print('\nTesting the cost function ...\n')

J = compute_cost(X, y, theta)

print('With theta = [0 ; 0]\nCost computed = %f\n' % J)
print('Expected cost value (approx) 32.07\n')

J = compute_cost(X, y, np.mat('-1 ; 2'))
コード例 #32
0
ファイル: hisparc_data.py プロジェクト: HiSPARC/correlation
print ''
plot_question = query_yes_no('Do you want to see a PLOT of your data (variable against timestamp)?')

if plot_question == True and download_question == False:
    use_downloaded_files = False
    print ''
    print 'If you want to analyze data you must already have a data set on your pc.'
    print 'Make sure it is located in: ' + os.getcwd()

elif plot_question == True and download_question == True:
    use_downloaded_files = query_yes_no('Do you want to use the data you downloaded earlier?')

if plot_question == True and use_downloaded_files == True:
    plot_variable1 = choose_one_variable(kind_of_data_in_table, stations)  # e.g. plot_variable = [('event_rate','data_s501_2011,12,7_2011,12,8.h5','501','events','')]
    values1, times, returntype = plot_data(plot_variable1)

if plot_question == True and use_downloaded_files == False:
    list_files = []
    stations = []

    print ''
    station_ID = question.digit("Enter the station ID that you want to use in your analysis ( e.g. 501 ) ")
    stations.append(station_ID)
    print ''
    number_of_files = question.digit("Enter the NUMBER of FILENAMES for station %s that you want to use in your analysis ( e.g. 6 ): " % station_ID)
    print ''
    print "You are going to enter filenames ( e.g. data_s501_2011,7,21_2011,7,22.h5 )"
    print 'Enter the filenames in CHRONOLOGICAL ORDER. '
    print ''
    for j in range(1, int(number_of_files) + 1):
コード例 #33
0
        and all the rois
    '''

    b0_orders = get_b0_orders(np.int(n_b0s))

    for b0_order in b0_orders:
    
        print 'Combination: {} {} {} {} {} {}'.format(incl_excl, n_b0s, b0_order, sep_av, transform, roi_name)
    
        results_file, results_dir = wrangle_text_files(data_dir, incl_excl, n_b0s,
                                            b0_order, sep_av, transform,
                                            roi_name, subs, locs, scans)
        
        data = read_in_data(results_file)
        
        plot_data(data, results_dir, roi_name, colors, shapes)

# Now answer specific questions that you care about

Q_ec_vol_n6(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, colors, shapes)

# How does everything change with the different number of B0s?

Q_n_b0s(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list, colors, shapes)


'''
# Find all the results files in all the b0_order folders
for incl_excl, n_b0s, sep_av, transform, roi_name in it.product(incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list):

    data_allorders, results_allorders_dir = collapse_data(data_dir, incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list)
コード例 #34
0
ファイル: Q_ec_vol_n6.py プロジェクト: KirstieJane/NSPN_CODE
def Q_ec_vol_n6(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, colors, shapes):
    """
    Q_ec_vol_n6 asks the question:
        "How does the volume that you register to affect the measurement
        when you use all the data"
        
    It reads in all the necessary files from a series of results_files and collapses
    across all of them so they can be plotted together
    
    Inputs:     data_dir
                incl_excl_list
                sep_av_list
                transform_list
                roi_list
        
    Output:     data array
    
    """
    
    #==========================================================================
    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    #---------------------------------------------------------------------------
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    from plot_data import plot_data
    from read_in_data import read_in_data
    #==========================================================================
    
    print '  Question: How does the choice of eddy correct volume affect the measurements?'

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name in it.product(incl_excl_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders = None
        
        b0_orders = get_b0_orders(np.int(6))

        for b0_order in b0_orders:
            glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6',
                                    'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

            files = glob(glob_string)

            dict = {'b0_order': b0_order}
            
            # Read in every file and combine them
            for file in files:
                data = read_in_data(file)
                data_allorders = combine_data(data_allorders, data, dict)
            
        # Name the results dir that this is going into:
        results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6',
                                'ALL_ORDERS', sep_av, transform)
        
        # Now plot the data
        plot_data(data_allorders, results_allorders_dir, roi_name, colors, shapes)
            
コード例 #35
0
print 'Running warm_up_exercise()...'
print '5x5 Identity Matrix: '

print warm_up_exercise()

# ======================= Part 2: Plotting =======================
print "Plotting Data..."

data = np.loadtxt(open("ex1data1.txt", "r"), delimiter=",")
X = data[:, 0]
y = data[:, 1]
m = len(y)  # Number of training examples

# Plot data
plt.figure()
plot_data(X, y)
plt.show()

# =================== Part 3: Gradient descent ===================
print 'Running Gradient Descent...'
# Add a column of ones to x
X = np.hstack((np.ones((m, 1)), X.reshape(m, 1)))

# Initialize fitting parameters
theta = np.zeros(2)

# Some gradient descent settings
iterations = 1500
alpha = 0.01

# Compute and display initial cost
コード例 #36
0
ファイル: Q_n_b0s.py プロジェクト: KirstieJane/NSPN_CODE
def Q_n_b0s(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list, colors, shapes):
    """
    Q_ec_vol_n6 asks the question:
        "How does the number of B0s you include change your measurement?"
        
    It reads in all the necessary files from a series of results_files and collapses
    across all of them so they can be plotted together
    
    Inputs:     data_dir
                incl_excl_list
                sep_av_list
                transform_list
                roi_list
        
    Output:     data array
    
    """
    
    #==========================================================================
    import os
    import numpy as np
    import numpy.lib.recfunctions as rfn
    from glob import glob
    import itertools as it
    #---------------------------------------------------------------------------
    from combine_data import combine_data
    from get_b0_orders import get_b0_orders
    from plot_data import plot_data
    from read_in_data import read_in_data
    #==========================================================================
    
    print '  Question: How does the number of B0s change your measurement'

    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name, ec_b0 in it.product(incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list):

        # Start off with an empty data array
        data_allorders_allb0s = None
        
        for n_b0s in range(1,7):
            
            b0_orders = get_b0_orders(np.int(n_b0s))

            b0_orders = [ order for order in b0_orders if order[:2] == ec_b0 ]
            
            for b0_order in b0_orders:
            
                glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s),
                                        'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

                files = glob(glob_string)

                dict = { 'b0_order': b0_order, 'n_b0s' : n_b0s }
                
                # Read in every file and combine them
                for file in files:
                    data = read_in_data(file)
                    data_allorders_allb0s = combine_data(data_allorders_allb0s, data, dict)
                
        # Name the results dir that this is going into:
        results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S',
                                'B0_{}'.format(ec_b0), sep_av, transform)
        
        # Now plot the data
        plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes)
        
    # Now do the same thing, but with REALLY all the B0s
    
    # Find all the results files in all the b0_order folders
    for incl_excl, sep_av, transform, roi_name in it.product(incl_excl_list, sep_av_list, transform_list, roi_list):

        # Start off with an empty data array
        data_allorders_allb0s = None
        
        for n_b0s in range(1,7):
            
            b0_orders = get_b0_orders(np.int(n_b0s))
            
            for b0_order in b0_orders:
            
                glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s),
                                        'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name))

                files = glob(glob_string)

                dict = { 'b0_order': b0_order, 'n_b0s' : n_b0s }
                
                # Read in every file and combine them
                for file in files:
                    data = read_in_data(file)
                    data_allorders_allb0s = combine_data(data_allorders_allb0s, data, dict)
                
        # Name the results dir that this is going into:
        results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S',
                                'ALL_B0S', sep_av, transform)
        
        # Now plot the data
        plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes)
        
        
def load_data(plot=False):
    datas = np.loadtxt('ex1data1.txt', delimiter=',')  # np自己的加载txt的方法
    if plot == True:
        plot_data(datas[:, 0], datas[:, 1])
    return datas
コード例 #38
0
def emg_process(calib_file_name, subject_name, file_path = None, doplot = True):

    #close previously opened plots.
    plt.close('all')

    #Eachs subject's directory of files
    ACC_CALIBRATION = os.path.realpath(file_path + "ACC_CALIBRATION/")
    SIGNALS = os.path.realpath(file_path + "SIGNALS/")
    EMG_MVC = os.path.realpath(file_path + "EMG_MVC/")


    """
    channels 1,2,3 (ACCknee)
    channel  4 	   (EMG_femoris)
    channel  5	   (EMG_hamstring)
    """

    all_files = os.listdir(file_path)

    print ("DOPLOT = ", doplot)


    #Open pdf and saves images in it
    #Open report.txt file and saves necessary data in it
    if doplot == True:
        pp, REPORTfile = pdf_report_creator(file_path)

    # ------------------------------------------------------------------------------------------------
    #                                 Knee Calibration
    # ------------------------------------------------------------------------------------------------

    print("\nKnee Calibration")

    # performs calibration of the Accelerometer based on the '.txt' file in ACC_Calibration path
    # if the calibration signal is not correct, select a standard file for calibration: xyzcal.txt
    X_Cal_K, Y_Cal_K, Z_Cal_K = calibration(calib_file_name, ACC_CALIBRATION, standard_calibration=True)

    # values of the knee calibration
    Vmin_X_K = X_Cal_K[1]
    Vmax_X_K = X_Cal_K[0]
    Vmin_Y_K = Y_Cal_K[1]
    Vmax_Y_K = Y_Cal_K[0]
    Vmin_Z_K = Z_Cal_K[1]
    Vmax_Z_K = Z_Cal_K[0]


    #------------------------------------------------------------------------------------------------
    #                                 Calculate Maximum Voluntary Contraction
    #------------------------------------------------------------------------------------------------

    #Calculate MVC
    MVC_femoris, emg_fsmooth = calculate_MVC_femoris(EMG_MVC)
    MVC_hamstring, emg_hsmooth = calculate_MVC_hamstring(EMG_MVC)

    #------------------------------------------------------------------------------------------------
    #                                   SIGNALS
    #------------------------------------------------------------------------------------------------

    #load and read all '.txt' files in SIGNALS folder
    #performs a frequency analysis of the EMG signal
    #find the maximum acceleration values
    #plot tests
    #save plots in PDF
    #create a report.txt file with relevant results

    print ("\nOpened SIGNALS folder. Processing Signals from: ", subject_name)
    for file_name in os.listdir(SIGNALS):
        if (("dj" not in file_name) and "DJ" not in file_name) and file_name[0] != '.':
            print ("\nFILE NAME = ", file_name)

            file = os.path.realpath(SIGNALS + "/" + file_name)

            print ("\nLoading channels...")

            #channels 4, 5 and 6 are used for the knee acceleration
            #and channels 7 and 8/9 for emg_femoris_and_hamstring
            ACCKneeX    =   load_channel(file,4)
            ACCKneeY    =   load_channel(file,5)
            ACCKneeZ    =   load_channel(file,6)
            emg_femoris, emg_femoris_raw = load_channel_emg(file, 7)

            f = open(file, 'r')
            f_head = [f.readline() for i in range(9)]
            first_line = f_head[8].split()
            if (first_line[7] == '0'):
                emg_hamstring, emg_hamstring_raw = load_channel_emg(file, 9)
            else:
                emg_hamstring, emg_hamstring_raw = load_channel_emg(file, 8)

            print ("done")

            #------------- KNEE --------------#

            #Acceleration
            ACCKneeX_G = convertV2G(ACCKneeX, Vmin_X_K, Vmax_X_K)
            ACCKneeY_G = convertV2G(ACCKneeY, Vmin_Y_K, Vmax_Y_K)
            ACCKneeZ_G = convertV2G(ACCKneeZ, Vmin_Z_K, Vmax_Z_K)

            # start running (differentiation of the ACCX signal)
            events = np.argwhere(smooth(abs(100 * np.diff(ACCKneeX)), window_len=200) > 500)
            start = events[0]
            # running period
            run_period = 2500

            # threshold of the signal window
            threshold_window = start + run_period + 100
            # find peak
            if (threshold_window < (len(ACCKneeX))):

                # calculate parameters
                max_index, preact_index, valley_ind_val, sm_rms = ACCel_indexes(ACCKneeX, ACCKneeY, ACCKneeZ, start, run_period)
                #run_period = 2500

                #find maximums
                ACC_max = abs_max(max_index, ACCKneeX_G, ACCKneeY_G, ACCKneeZ_G)
                Preact_max = abs_preact(preact_index, ACCKneeX_G, ACCKneeY_G, ACCKneeZ_G)

                print("\nFrequency Analysis...")
                n_samples = len(ACCKneeX)
                #defining a window where the frequency analysis will be done
                min_knee, max_knee = get_limits(ACC_max, n_samples, 500)

                #Performing an FFT to the EMG_femoris and hamstring signals with a maximum frequency of 100 Hz
                freqs_femoris_knee, mags_femoris_knee = sfft(emg_femoris_raw[min_knee:max_knee]/float(MVC_femoris), 100)
                freqs_hamstring_knee, mags_hamstring_knee = sfft(emg_hamstring_raw[min_knee:max_knee]/float(MVC_hamstring), 100)

                #Calculating the EMG frequency in the maximum acceleration
                maximum_freq_femoris_knee_position = find(mags_femoris_knee == max(mags_femoris_knee))
                maximum_freq_femoris_knee = float(freqs_femoris_knee[maximum_freq_femoris_knee_position])

                maximum_freq_hamstring_knee_position = find(mags_hamstring_knee == max(mags_hamstring_knee))
                maximum_freq_hamstring_knee = float(freqs_hamstring_knee[maximum_freq_hamstring_knee_position])

                #-------------------------------------------
                #                  Plot Test
                #-------------------------------------------

                print("\nPrinting results...")

                emg_femoris_MVC = 100.0*(emg_femoris/float(MVC_femoris))
                emg_hamstring_MVC = 100.0*(emg_hamstring/float(MVC_hamstring))

                fig = plot_data(valley_ind_val, sm_rms, file_name, max_index, preact_index, ACCKneeX_G, ACCKneeY_G, ACCKneeZ_G, emg_femoris_MVC, emg_hamstring_MVC, freqs_femoris_knee, mags_femoris_knee, freqs_hamstring_knee, mags_hamstring_knee)



            # #color
            #     face_color_r = 248 / 255.0
            #     face_color_g = 247 / 255.0
            #     face_color_b = 249 / 255.0
            #
            #     # pars
            #     left = 0.05  # the left side of the subplots of the figure
            #     right = 0.95  # the right side of the subplots of the figure
            #     bottom = 0.05  # the bottom of the subplots of the figure
            #     top = 0.92  # the top of the subplots of the figure
            #     wspace = 0.2  # the amount of width reserved for blank space between subplots
            #     hspace = 0.6  # the amount of height reserved for white space between subplots
            #
            #     pars = SubplotParams(left, bottom, right, top, wspace, hspace)
            #
            #     # figure
            #     fig = plt.figure(figsize=(22, 14), facecolor=(face_color_r, face_color_g, face_color_b), dpi=50,
            #                      subplotpars=pars)
            #     fig.suptitle(file_name, fontsize=20, horizontalalignment='center', verticalalignment='top')
            #
            #
            #     #seaborn layout with whitegrid
            #     with sns.axes_style("whitegrid"):
            #
            #         ax1 = plt.subplot(511)
            #         ax1.patch.set_facecolor('ivory')
            #         ax1.plot(sm_rms, color='darkslategray', linewidth=1.5)
            #         ax1.plot(valley_ind_val, sm_rms[valley_ind_val], 'ro')
            #         ax1.axis('tight')
            #         ax1.set_ylim([0,max(sm_rms)])
            #         ax1.axvline(start, color='springgreen', linestyle='solid')
            #         ax1.annotate('Started Running', xy=(start, max(sm_rms)), xytext=(start+100, max(sm_rms)),
            #                      arrowprops=dict(facecolor='black', shrink=0.1))
            #         ax1.axvline(start + run_period, color='springgreen', linestyle='solid')
            #         ax1.annotate('next peak is the change of direction', xy=(start+run_period, max(sm_rms)), xytext=(start + run_period + 100, max(sm_rms)),
            #                      arrowprops=dict(facecolor='black', shrink=0.1))
            #         ax1.set_title(r'ACC_smoothed ', size=12)
            #         ax1.set_ylabel("Acceleration (mV)")
            #         ax1.set_xlabel("Time(ms)")
            #
            #         ax2 = plt.subplot(512)
            #         ax2.patch.set_facecolor('ivory')
            #         ax2.plot(sm_rms, color='darkslategray', linewidth=1.5)
            #         ax2.axis('tight')
            #         ax2.axvline(win_ind1, color='springgreen', linestyle='solid')
            #         ax2.axvline(win_ind2, color='springgreen', linestyle='solid')
            #         ax2.plot(np.linspace(win_ind1, win_ind2, win_ind2 - win_ind1), sm_rms[win_ind1:win_ind2], color='darkslategray',
            #                  linewidth=1.5)
            #         ax2.plot(max_index, sm_rms[max_index], 'ro')
            #         ax2.plot(preact_index, sm_rms[preact_index], 'ro')
            #         ax2.annotate('Pre Act', xy=(preact_index, sm_rms[preact_index]), xytext=(preact_index, sm_rms[preact_index] + 300),
            #                      arrowprops=dict(facecolor='black', shrink=0.05))
            #         ax2.annotate('Max Act', xy=(max_index, sm_rms[max_index]), xytext=(max_index, sm_rms[max_index] + 300),
            #                      arrowprops=dict(facecolor='black', shrink=0.05))
            #         ax2.set_title(r'ACC_smoothed ', size=12)
            #         ax2.set_ylabel("Acceleration (mV)")
            #         ax2.set_xlabel("Time(ms)")
            #
            #         ax3 = plt.subplot(513)
            #         ax3.patch.set_facecolor('ivory')
            #         ax3.plot(ACCKneeX, color='darkslategray', linewidth=1.5)
            #         ax3.axvline(preact_index, color='red', linestyle='solid')
            #         ax3.axvline(max_index, color='red', linestyle='solid')
            #         ax3.axis('tight')
            #         ax3.set_title(r'ACC_X ', size=12)
            #         ax3.set_ylabel("Acceleration (mV)")
            #         ax3.set_xlabel("Time(ms)")
            #
            #         ax4 = plt.subplot(514)
            #         ax4.patch.set_facecolor('ivory')
            #         ax4.plot(ACCKneeY, color='darkslategray', linewidth=1.5)
            #         ax4.axvline(preact_index, color='red', linestyle='solid')
            #         ax4.axvline(max_index, color='red', linestyle='solid')
            #         ax4.axis('tight')
            #         ax4.set_title(r'ACC_Y ', size=12)
            #         ax4.set_ylabel("Acceleration (mV)")
            #         ax4.set_xlabel("Time(ms)")
            #
            #         ax5 = plt.subplot(515)
            #         ax5.patch.set_facecolor('ivory')
            #         ax5.plot(ACCKneeZ, color='darkslategray', linewidth=1.5)
            #         ax5.axvline(preact_index, color='red', linestyle='solid')
            #         ax5.axvline(max_index, color='red', linestyle='solid')
            #         ax5.axis('tight')
            #         ax5.set_title(r'ACC_Z ', size=12)
            #         ax5.set_ylabel("Acceleration (mV)")
            #         ax5.set_xlabel("Time(ms)")

                #Returns the instant where the maximum acceleration occurred
                #accel_max_knee, absolute_acc_knee, max_absolute_acc_knee = max_accel(ACCKneeX_G, ACCKneeY_G, ACCKneeZ_G)

                #-------------------------------------------
                #                  Plot Test
                #-------------------------------------------

                pp.savefig()
                print("\nCalculating maximum acceleration...")


        #fig = plot_data(file_name, accel_max_knee, ACCKneeX_G, ACCKneeY_G, ACCKneeZ_G, emg_femoris_MVC, emg_hamstring_MVC, freqs_femoris_knee, mags_femoris_knee, freqs_hamstring_knee, mags_hamstring_knee)

        #fig.show()
        #plt.show()


        #save figures in PDF


        newLine = '\n----------------------------------------------------------------------\n'


        #In report file integrate:
        #Max ACCX,Y and Z after 3 s
        #Preactivation: smooth signal and find peak with pre-peak
        #MVC with max acceleration
        #max acceleration in landing
        #max emg in max landing acceleration

        print("\nCreating Report file...")
        #Save Report
        REPORTfile.write(newLine)
        REPORTfile.write(str(file_name) + "\n")
        REPORTfile.write("Maximum knee total acceleration (in CD): "+ str(ACC_max) + " g\n")
        REPORTfile.write("Maximum ACCX: " + str(max(np.mean(ACCKneeX_G[max_index-10:max_index+10]))) + ' g\n')
        REPORTfile.write("Maximum ACCY: " + str(max(ACCKneeY_G[max_index-10:max_index+10])) + ' g\n')
        REPORTfile.write("Maximum ACCZ: " + str(max(ACCKneeZ_G[max_index-10:max_index+10])) + ' g\n')
        REPORTfile.write("%MVC [Rectus Femoris]: " + str(emg_femoris_MVC[max_index-10:max_index+10])+' %\n')
        REPORTfile.write("%MVC [Hamstring]: " + str(emg_hamstring_MVC[max_index-10:max_index+10])+' %\n')
        REPORTfile.write("RMS [Rectus Femoris]: " + str(RMS_femoris_knee)+'\n')
        REPORTfile.write("RMS [Hamstring]: " + str(RMS_hamstring_knee)+'\n')
        REPORTfile.write("Frequency [Rectus Femoris]: " + str(maximum_freq_femoris_knee)+'\n')
        REPORTfile.write("Frequency [Hamstring]: " + str(maximum_freq_hamstring_knee)+'\n')

    #Close report and graphics PDF
    REPORTfile.close()
    pp.close()

    print ("Closing...")
    #plt.show()
    return
コード例 #39
0
rows_sum = np.sum(gamma, axis=1)
gamma /= rows_sum[:, None]

# % to facilitate visualization, we label each data point by the cluster
# % which takes most responsibility for it.
labels = np.argmax(gamma, 1)
m = gamma[labels]

# % this draws a plot of the initial labeling.
# plot_data(data, labels)

# % given the initial labeling we set mu, sigma, and pi based on the m step
# % and calculate the likelihood.
ll = -np.infty
[mu, sigma, pi] = m_step_gaussian_mixture(data, gamma)
nll = log_likelihood_gaussian_mixture(data, mu, sigma, pi)

print('log likelihood = %f' % (nll,))
# % the loop iterates until convergence as determined by e.
while ll + e < nll:
    ll = nll
    gamma = e_step_gaussian_mixture(data, pi, mu, sigma)
    [mu, sigma, pi] = m_step_gaussian_mixture(data, gamma)
    nll = log_likelihood_gaussian_mixture(data, mu, sigma, pi)
    print('log likelihood = %f' % (nll,))
    
labels = np.argmax(gamma, 1)
m = gamma[labels]
plot_data(data, labels);

コード例 #40
0
def find_best_traj(do_plots=False, out_index=0):
    """
        Find the best trajectories from "template" sample
    """
    ncpu = len(filter(lambda x: x.find('processor') == 0,
                      open('/proc/cpuinfo')
                      .read().split('\n')))
    print('ncpu', ncpu)

    pool = multiprocessing.Pool(ncpu)

    train_df = pd.read_csv('train_idx.csv.gz', compression='gzip')
    test_df = pd.read_csv('test_idx.csv.gz', compression='gzip')
    submit_df = pd.read_csv('sampleSubmission.csv.gz', compression='gzip')

    train_df = clean_data(train_df)
    test_df = clean_data(test_df)

    print('shape', train_df.shape, test_df.shape, submit_df.shape)
    print(test_df.dtypes)

    if do_plots:
        from plot_data import plot_data
        plot_data(train_df, prefix='train_html', do_scatter=False)
        plot_data(test_df, prefix='test_html', do_scatter=False)

    train_nib = pd.read_csv('train_nib.csv.gz', compression='gzip')
    test_nib = pd.read_csv('test_nib.csv.gz', compression='gzip')

    test_trj = pd.read_csv('test_trj.csv.gz', compression='gzip')

    np.random.seed(8675309)
    randperm = np.random.permutation(np.arange(train_df.shape[0]))
    dfs = [{'df': test_df, 'fn': 'test_final', 'test': True},
           {'df': train_df.iloc[randperm[:320], :],
            'fn': 'train_final', 'test': False},
           {'df': train_df.iloc[randperm[320:640], :],
            'fn': 'valid_final', 'test': False}]

    outlabels = ['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND',
                 'NMINMATCH', 'TAXI_ID', 'TIMESTAMP',
                 'ORIGIN_LAT', 'ORIGIN_LON',
                 'BEST_LAT', 'BEST_LON', 'BEST_TRIP_TIME',
                 'AVG_LAT', 'AVG_LON', 'AVG_TRIP_TIME',
                 'DEST_LAT', 'DEST_LON', 'TRIP_TIME']

    njobs = 5
    nevents = 960/3/njobs
    fnindex = out_index//njobs
    evindex = out_index%njobs
    first_event = int(evindex*nevents)
    last_event = int((evindex+1)*nevents)

    dfs_dict = dfs[fnindex]
    df_ = dfs_dict['df']
    outfname = dfs_dict['fn']
    is_test = dfs_dict['test']

    outfile = gzip.open('%s_%02d.csv.gz' % (outfname, evindex), 'wb')
    csv_writer = csv.writer(outfile)
    csv_writer.writerow(outlabels)
    print(outfname, df_.shape, first_event, last_event)
    for idx, _row in enumerate(df_.iterrows()):
        _, row = _row
        if idx < first_event:
            continue
        if idx >= last_event:
            continue
        if idx % 10 == 0:
            print('test %d' % idx)
        tidx = row['TRAJECTORY_IDX']
        if is_test:
            tdf_ = test_trj
        else:
            tdf_ = pd.read_csv('train/train_trj_%02d.csv.gz' % (tidx%100),
                               compression='gzip')
        traj_ = get_trajectory(tidx, tr_df=tdf_)
        if is_test:
            if traj_.shape[0] > 15:
                traj_ = traj_[5:-5, :]
        if is_test:
            tedf_ = test_nib
        else:
            tedf_ = train_nib
        common_traj = {}
        skiplist_ = tuple(randperm[:640])
        match_list_, min_n_match = get_matching_list(tidx, te_df=tedf_,
                                                     tr_df=train_nib,
                                                     skiplist=skiplist_)
        print('match_list_', len(match_list_), min_n_match)
        match_list_parallel = [{} for i in range(100)]
        for tidx in match_list_:
            match_list_parallel[tidx%100][tidx] = match_list_[tidx]

        parallel_args = [(traj_, i, match_list_parallel[i], skiplist_)
                         for i in range(100)]
        for out_traj_ in pool.imap_unordered(find_common_trajectories,
                                             parallel_args):
            for k, v in out_traj_.items():
                common_traj[k] = v
        sort_list = sorted(common_traj.items(), key=lambda x: x[1])
        cond = train_df['TRAJECTORY_IDX'] == sort_list[-1][0]
        best_lat = float(train_df[cond]['DEST_LAT'])
        best_lon = float(train_df[cond]['DEST_LON'])
        best_time = float(train_df[cond]['TRIP_TIME'])
        top_lats = []
        top_lons = []
        top_time = []
        for key, _ in sort_list[-10:]:
            cond = train_df['TRAJECTORY_IDX'] == key
            top_lats.append(float(train_df[cond]['DEST_LAT']))
            top_lons.append(float(train_df[cond]['DEST_LON']))
            top_time.append(float(train_df[cond]['TRIP_TIME']))
        avg_lat = np.mean(top_lats)
        avg_lon = np.mean(top_lons)
        avg_time = np.mean(top_time)
        dist = haversine_distance(best_lat, best_lon, avg_lat, avg_lon)
        dtime = abs(best_time-avg_time)
        print('best-avg dist %s time %s' % (dist, dtime))
        row_dict = dict(row)
        row_dict['BEST_LAT'] = best_lat
        row_dict['BEST_LON'] = best_lon
        row_dict['BEST_TRIP_TIME'] = best_time
        row_dict['AVG_LAT'] = avg_lat
        row_dict['AVG_LON'] = avg_lon
        row_dict['AVG_TRIP_TIME'] = avg_time
        row_dict['NMINMATCH'] = min_n_match
        for k in row_dict:
            if k in ('ORIGIN_LAT', 'ORIGIN_LON', 'TOTAL_DISTANCE',
                     'BEST_LAT', 'BEST_LON', 'AVG_LAT', 'AVG_LON',
                     'DEST_LAT', 'DEST_LON', 'TRIP_ID'):
                continue
            row_dict[k] = int(row_dict[k])
        row_val = [row_dict[k] for k in outlabels]
        csv_writer.writerow(row_val)
        outfile.flush()
    return
コード例 #41
0
def compare_merge_hybrid_quick():
    """
    Процедура порівняння методів сортування: злиттям, гібридного, швидкого та 
    рандомізованого швидкого. Порівння алгоритмів ґрунтуєься на дослідженні 
    часу їх роботи (в сек) та кількості операцій порівнянь елементів. Для
    цього використовується функція test. 
    Тестування проводиться на задачах різної розмірності: від n_begin до n_end
    з кроком n_step (значення цих параметрів встановлюються в середині процедури)
    Для кожної розмірності генерується repeats екземплярів задачі. При чому 
    алгоритми запускаються на одних і тих самих екземплярах задачі.
    """

    # параметри для проведення експерименту
    repeats = 10  # кількість запусків для однієї розмірності
    n_begin = 10  # початкова розмірність задачі
    n_end = 1000  # кінцева розмірність задачі
    n_step = 50  # крок розмірності

    types = ["random"]
    data_plot = {
        'random': {
            'merge': {},
            'hybrid': {},
            'quick': {},
            'random_quick': {}
        }
    }
    data_plot_2 = {
        'random': {
            'merge': {},
            'hybrid': {},
            'quick': {},
            'random_quick': {}
        }
    }

    for n in range(n_begin, n_end + 1, n_step):
        print "\nDATA SIZE: ", n

        for gen_type in types:
            data = [generate_data(n) for i in range(repeats)]

            t_merge, op_counter = test(merge_sort, deepcopy(data))
            print "Merge time:", t_merge, "op_count:", op_counter
            data_plot[gen_type]['merge'][n] = t_merge
            data_plot_2[gen_type]['merge'][n] = op_counter

            t_hybrid, op_counter = test(hybrid_sort, deepcopy(data))
            print "Hybrid time:", t_hybrid, "op_count:", op_counter
            data_plot[gen_type]['hybrid'][n] = t_hybrid
            data_plot_2[gen_type]['hybrid'][n] = op_counter

            t_quick, op_counter = test(quick_sort, deepcopy(data))
            print "Quick time:", t_quick, "op_count:", op_counter
            data_plot[gen_type]['quick'][n] = t_quick
            data_plot_2[gen_type]['quick'][n] = op_counter

            t_rquick, op_counter = test(randomized_quick_sort, deepcopy(data))
            print "Randomized quick time:", t_rquick, "op_count:", op_counter
            data_plot[gen_type]['random_quick'][n] = t_rquick
            data_plot_2[gen_type]['random_quick'][n] = op_counter

    # побудувати графіки швидкості роботи алгоритмів
    # розкоментуйте наступну інструкцію для виводу графіків
    plot_data(data_plot,
              logarithmic=False,
              oneplot=True,
              data_2=data_plot_2,
              label_sort_type=False,
              label_data2_label=False,
              data_label='Time in sec',
              data2_label='Number of operations',
              legend_pos=2,
              legend2_pos=2,
              show_markers=False)
コード例 #42
0
 def plot_btn_clk():
     plot_kwargs = self.build_plot_kwargs()
     plot_data.plot_data(plot_kwargs)
コード例 #43
0
ファイル: test_ploy.py プロジェクト: kore-geosystems/ipynb
import pandas as pd
import numpy as np
import plot_data as plt

file_name = "kma-1_drillers_dashboard_data.csv"

df = pd.read_csv(file_name, parse_dates=["time"], index_col="time")
df2 = df.reindex(pd.date_range(start=min(df.index), end=max(df.index), freq="s"))
df2["delta_wob"] = pd.concat(np.concatenate((np.array([0]), np.diff(df2["wob"]))))

time_start1 = "2015-11-06 01:00:00"
time_end1 = "2015-11-06 04:00:00"
df1 = plt.plot_data(df, time_start1, time_end1)
df1.describe()