Exemple #1
0
def draw(name, p, clf, X, y, step, ):
    stepx = step
    stepy = step
    x_min, y_min = np.amin(X, 0)
    x_max, y_max = np.amax(X, 0)
    x_min -= stepx
    x_max += stepx
    y_min -= stepy
    y_max += stepy
    xx, yy = np.meshgrid(np.arange(x_min, x_max, stepx),
                         np.arange(y_min, y_max, stepy))

    mesh_dots = np.c_[xx.ravel(), yy.ravel()]
    zz = np.apply_along_axis(lambda t: clf.predict(t), 1, mesh_dots)
    zz = np.array(zz).reshape(xx.shape)

    plt.figure(figsize=(10, 10))
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    x0, y0 = X[y == -1].T
    x1, y1 = X[y == 1].T

    plt.pcolormesh(xx, yy, zz, cmap=ListedColormap(['#FFAAAA', '#AAAAFF']))
    plt.scatter(x0, y0, color='red', s=100)
    plt.scatter(x1, y1, color='blue', s=100)

    sup_ind = clf.get_non_bound_indices()
    X_sup = X[sup_ind]
    x_sup, y_sup = X_sup.T

    plt.scatter(x_sup, y_sup, color='white', marker='x', s=60)
    plt.suptitle(p)
    plt.savefig(name + '_' + p['name'] + '.png')
    plt.show()
Exemple #2
0
def build_graphs(partner_info):
    # type: (DataFrame) -> Figure
    import matplotlib.pyplot as plt

    fig = plt.figure()
    ax1 = fig.add_subplot(2, 2, 1)
    ax1.hist(np.random.randn(100), bins=20, alpha=0.3)
    ax2 = fig.add_subplot(2, 2, 2)
    ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
    fig.add_subplot(2, 2, 3)
    return fig
Exemple #3
0
    def isodose_display(self):
        """
        Display isodoses on the DICOM Image.
        """
        slider_id = self.slider.value()
        curr_slice_uid = self.patient_dict_container.get("dict_uid")[slider_id]
        z = self.patient_dict_container.dataset[
            slider_id].ImagePositionPatient[2]
        dataset_rtdose = self.patient_dict_container.dataset['rtdose']
        grid = get_dose_grid(dataset_rtdose, float(z))

        if not (grid == []):
            x, y = np.meshgrid(np.arange(grid.shape[1]),
                               np.arange(grid.shape[0]))

            # Instantiate the isodose generator for this slice
            isodosegen = cntr.Cntr(x, y, grid)

            # sort selected_doses in ascending order so that the high dose isodose washes
            # paint over the lower dose isodose washes
            for sd in sorted(
                    self.patient_dict_container.get("selected_doses")):
                dose_level = sd * self.patient_dict_container.get("rx_dose_in_cgray") / \
                             (dataset_rtdose.DoseGridScaling * 10000)
                contours = isodosegen.trace(dose_level)
                contours = contours[:len(contours) // 2]

                polygons = self.calc_dose_polygon(
                    self.patient_dict_container.get("dose_pixluts")
                    [curr_slice_uid], contours)

                brush_color = self.iso_color[sd]
                with open(resource_path('src/data/line&fill_configuration'),
                          'r') as stream:
                    elements = stream.readlines()
                    if len(elements) > 0:
                        iso_line = int(elements[2].replace('\n', ''))
                        iso_opacity = int(elements[3].replace('\n', ''))
                        line_width = float(elements[4].replace('\n', ''))
                    else:
                        iso_line = 2
                        iso_opacity = 5
                        line_width = 2.0
                    stream.close()
                iso_opacity = int((iso_opacity / 100) * 255)
                brush_color.setAlpha(iso_opacity)
                pen_color = QtGui.QColor(brush_color.red(),
                                         brush_color.green(),
                                         brush_color.blue())
                pen = self.get_qpen(pen_color, iso_line, line_width)
                for i in range(len(polygons)):
                    self.scene.addPolygon(polygons[i], pen,
                                          QtGui.QBrush(brush_color))
Exemple #4
0
def fft():
    from pandas import np
    start = time.time()
    np.fft.fft(np.exp(2j * np.pi * np.arange(10000000) / 8))
    end = time.time()
    t = end - start
    print(f'fft time: {t} !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
Exemple #5
0
def download_graphs_from_all(path_here, path_on_server, name_server,
                             folder_name):
    number_folder = 35
    what_to_copy = ["graph.png", "graph1.png", "graph2.png", "movie.mp4"]
    folders = np.arange(0, number_folder + 1)

    for el in folders:
        logging.debug("Analysing folder " + str(el))
        for file in what_to_copy:
            remote_path = path_on_server + folder_name + "/" + str(
                el) + "/" + file
            bashCommand = "scp " + name_server + ":" + remote_path + " ."

            directory = path_here + folder_name
            try:
                os.stat(directory)
            except:
                os.mkdir(directory)
            directory = path_here + folder_name + "/" + str(el)
            try:
                os.stat(directory)
            except:
                os.mkdir(directory)

            process = subprocess.Popen(bashCommand.split(),
                                       stdout=subprocess.PIPE,
                                       cwd=directory)
            output, error = process.communicate()

            logging.debug("Copied " + str(file))
def gen_unique_colors(num_colors):
    # https://stackoverflow.com/questions/470690/how-to-automatically-generate-n-distinct-colors
    # Response by Uri Cohen:
    colors = []
    for i in np.arange(0.0, 360.0, 360.0 / num_colors):
        hue = i / 360.0
        lightness = (50 + np.random.rand() * 10) / 100.0
        saturation = (90 + np.random.rand() * 10) / 100.0
        colors.append(colorsys.hls_to_rgb(hue, lightness, saturation))
    return colors
Exemple #7
0
    def print_graph_msd_total(self, total_distance, std_distances,
                              path_where_to_save):
        x = np.arange(len(names))
        plt.figure(figsize=(12, 6))
        sns.set_style("darkgrid")
        plt.errorbar(x, total_distance, std_distances, elinewidth=0.5)
        plt.xlabel("Generation")
        plt.ylabel("msd")

        # plt.show()
        plt.savefig(path_where_to_save + "/msds.png", dpi=500)
        plt.close()
        logging.debug("msds saved!")
Exemple #8
0
    def __init__(
            self,
            n_people,
            reco_seed=DEFAULT_RECO_SEED,
            muta_seed=DEFAULT_MUTA_SEED,
            fake_seed=DEFAULT_FAKE_SEED):

        """Initialize with the number of people to put in the base records.
        Provodes variables for random seeds to be passed to,
          1) the random instance used to choose columns in this class
          2) the random instance used in the mutator module
          3) the random instance used in the faker module
        """

        # initialize mutator class
        self.muta = mutator.Mutator(seed=muta_seed)

        # initialize faker
        self.fake = Faker()
        self.fake.seed(fake_seed)

        # initialize local random
        self.random = random.Random()
        self.random.seed(reco_seed)

        # initialize empty DataFrame
        df_base = pandas.DataFrame(
            np.empty((n_people, len(COLS))) * np.nan,
            columns=COLS)

        # add unique person ID
        df_base['id'] = np.arange(n_people)

        # generate fake data
        for i in range(n_people):
            df_base.loc[i, 'first_name'] = self.fake.first_name()
            df_base.loc[i, 'last_name'] = self.fake.last_name()
            df_base.loc[i, 'zipcode'] = self.fake.zipcode()

        self.n_people = n_people
        self.df_base = df_base

        # initialize index trackers
        self.indx_min = self.df_base.index.min()
        self.indx_max = self.df_base.index.max()

        # initialize mutation history tracker
        self.history = defaultdict(list)
Exemple #9
0
def detect_surface(profile):
    """Automatic detection of surface (begin of snowpack).

    :param profile: The profile to detect surface in.
    :return: Distance where surface was detected.
    :rtype: float
    """

    # Cut off ca. 1 mm
    distance = profile.samples.distance.values[250:]
    force = profile.samples.force.values[250:]

    force = downsample(force, 20)
    distance = downsample(distance, 20)

    force = smooth(force, 242)

    y_grad = np.gradient(force)
    y_grad = downsample(y_grad, 3)
    x_grad = downsample(distance, 3)

    max_force = np.amax(force)

    try:
        for i in np.arange(100, x_grad.size):
            std = np.std(y_grad[:i - 1])
            mean = np.mean(y_grad[:i - 1])
            if y_grad[i] >= 5 * std + mean:
                surface = x_grad[i]
                break

        if i == x_grad.size - 1:
            surface = max_force

        log.info('Detected surface at {:.3f} mm in profile {}'.format(
            surface, profile))
        return surface

    except ValueError:
        log.warning('Failed to detect surface')
        return max_force
Exemple #10
0
plt.figure(figsize=(5, 5))
image = image[0] if image.shape[-1] is 3 else image[0, ..., 0]
cmap = None if image.shape[-1] is 3 else 'gray'
plt.imshow(image, cmap=cmap, interpolation='none')
for idx, jdx in enumerate(data_generator.graph):
    if jdx > -1:
        x1 = keypoints[0, idx, 0]
        x2 = keypoints[0, jdx, 0]
        if (0 <= x1 <= IMAGE_SIZE[0]) and (0 <= x2 <= IMAGE_SIZE[0]):
            plt.plot(
                [keypoints[0, idx, 0], keypoints[0, jdx, 0]],
                [keypoints[0, idx, 1], keypoints[0, jdx, 1]],
                'r-'
            )

plt.scatter(keypoints[0, :, 0], keypoints[0, :, 1], c=np.arange(data_generator.keypoints_shape[0]), s=50, cmap=plt.cm.hsv, zorder=3)

plt.show()

# Augmentation

augmenter = []

augmenter.append(FlipAxis(data_generator, axis=0))  # flip image up-down
augmenter.append(FlipAxis(data_generator, axis=1))  # flip image left-right

sometimes = []
sometimes.append(iaa.Affine(scale={"x": (0.95, 1.05), "y": (0.95, 1.05)},
                            translate_percent={'x': (-0.05, 0.05), 'y': (-0.05, 0.05)},
                            shear=(-8, 8),
                            order=ia.ALL,
    logging.basicConfig(format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.DEBUG)

    # first_path = "/Volumes/TheMaze/TuringLearning/february/15feb/Experiment-plusplusGeolife/"
    first_path = "/Volumes/TheMaze/TuringLearning/february/snow/Experiment-plusplus/"

    # read all the trajectories for the distance to the real point
    logging.debug("Checking the distances...")
    real_distances, real_distances_bearing = analise_distances(
        first_path, "0/", True)
    # 0 np.max(array), 1 np.min(array), 2 np.mean(array), 3 np.std(array), 4 np.median(array)

    # single graphs
    trajectories = len(real_distances[0].keys())
    num = np.arange(0, trajectories)
    real_total = []
    number_tra = []
    for tra in range(trajectories):
        x = []
        x = np.arange(0, len(real_distances))
        median = []
        for el in real_distances:
            median.append(np.median(np.array(el[tra][1])))

        median_bearing = []
        for el in real_distances_bearing:
            median_bearing.append(np.median(np.array(el[tra][1])))

        total_sum_median = []
        for i in range(len(median)):
Exemple #12
0
# dpi参数指定绘图对象的分辨率,即每英寸多少个像素,缺省值为80      1英寸等于2.5cm,A4纸是 21*30cm的纸张
# facecolor:背景颜色
# edgecolor:边框颜色
# frameon:是否显示边框
# figure = plt.figure(num='gg', figsize=(5, 3), dpi=None, facecolor='red', edgecolor=None, frameon=True)
# plt.show()

# 创建单个子图
# nrows:subplot的行数
# ncols:subplot的列数
# sharex:所有subplot应该使用相同的X轴刻度(调节xlim将会影响所有的subplot)
# sharex:所有subplot应该使用相同的Y轴刻度(调节ylim将会影响所有的subplot)
# subpolt_kw:用于创建各subplot的关键字字典
# **fig_kw 创建figure时的其他关键字
# plt.subplot(nrows, ncols, sharex, sharey, subplot_kw, **fig_kw)
x = np.arange(0, 100)
# 第一行左图
#作图1
plt.subplot(221)
plt.plot(x, x)
#作图2
plt.subplot(222)
plt.plot(x, -x)
#作图3
plt.subplot(223)
plt.plot(x, x**2)
plt.grid(color='r', linestyle='--', linewidth=1, alpha=0.3)
#作图4
plt.subplot(224)
plt.plot(x, np.log(x))
plt.show()
Exemple #13
0
import matplotlib.pyplot as plt
from pandas import np

fig = plt.figure(figsize=(20, 8), dpi=80)
x = np.arange(2, 26, 2)
# 数据在x轴的位置,是一个可以迭代的对象
y = [15, 13, 14, 5, 17, 20, 25, 26, 26, 17, 22, 1]
# 数据在y轴的位置,是一个可以迭代的对象
# 绘图
plt.plot(x, y)
# 绘制x轴的刻度
plt.xticks(range(2, 25))
# 保存
plt.savefig("./t1.png")
if __name__ == '__main__':
    print(1)


Exemple #14
0
def gbc_overfitting(date_file, X_train, y_train, X_test, y_test):
    n_gb = []
    score_gb = []
    time_gb = []
    n_gbes = []
    score_gbes = []
    time_gbes = []

    n_estimators = 100

    # We specify that if the scores don't improve by atleast 0.01 for the last
    # 10 stages, stop fitting additional stages
    gbes = GradientBoostingClassifier(n_estimators=n_estimators,
                                      validation_fraction=0.2,
                                      n_iter_no_change=10,
                                      tol=0.01,
                                      random_state=0)
    gb = GradientBoostingClassifier(n_estimators=n_estimators, random_state=0)
    start = time.time()
    gb.fit(X_train, y_train)
    time_gb.append(time.time() - start)

    start = time.time()
    gbes.fit(X_train, y_train)
    time_gbes.append(time.time() - start)

    score_gb.append(gb.score(X_test, y_test))
    score_gbes.append(gbes.score(X_test, y_test))

    n_gb.append(gb.n_estimators_)
    n_gbes.append(gbes.n_estimators_)

    bar_width = 0.2
    n = 1
    index = np.arange(0, n * bar_width, bar_width) * 2.5
    index = index[0:n]

    plt.figure(figsize=(9, 5))

    bar1 = plt.bar(index,
                   score_gb,
                   bar_width,
                   label='Without early stopping',
                   color='crimson')
    bar2 = plt.bar(index + bar_width,
                   score_gbes,
                   bar_width,
                   label='With early stopping',
                   color='coral')

    plt.xticks(index + bar_width, date_file)
    plt.yticks(np.arange(0, 1.3, 0.1))

    def autolabel(rects, n_estimators):
        """
        Attach a text label above each bar displaying n_estimators of each model
        """
        for i, rect in enumerate(rects):
            plt.text(rect.get_x() + rect.get_width() / 2.,
                     1.05 * rect.get_height(),
                     'n_est=%d' % n_estimators[i],
                     ha='center',
                     va='bottom')

    autolabel(bar1, n_gb)
    autolabel(bar2, n_gbes)

    plt.ylim([0, 1.3])
    plt.legend(loc='best')
    plt.grid(True)

    plt.xlabel('Datasets')
    plt.ylabel('Test score')

    plt.show()
Exemple #15
0
                                                        random_state=1)
    model = LinearRegression()
    model.fit(x_train, y_train)
    print(model.coef_, model.intercept_)

    order = y_test.argsort(axis=0)
    y_test = y_test.values[order]
    x_test = x_test.values[order, :]
    y_test_pred = model.predict(x_test)
    mse = pd.np.mean((y_test_pred - pd.np.array(y_test))**2)
    rmse = pd.np.sqrt(mse)
    mse_sys = mean_squared_error(y_test, y_test_pred)
    print('MSE= ', mse, end=' ')
    print('MSE(System Function) = ', mse_sys, end=' ')
    print(mean_absolute_error(y_test, y_test_pred))

    error = y_test - y_test_pred
    np.set_printoptions(suppress=True)
    print('error = ', error)
    plt.hist(error, bins=20, color='g', alpha=0.6, edgecolor='k')
    plt.show()

    plt.figure(facecolor='w')
    t = np.arange(len(x_test))
    plt.plot(t, y_test, 'r-', linewidth=2, label='真实数据')
    plt.plot(t, y_test_pred, 'g-', linewidth=2, label='预测数据')
    plt.legend(loc='upper left')
    plt.title('线性回归预测销量', fontsize=18)
    plt.grid(b=True, ls=':')
    plt.show()
Exemple #16
0
    def run(self):
        folders = how_many_fatherFolder(self.path)
        folders = [s for s in folders if not re.search('txt', s)]
        folders = [s for s in folders if not re.search('jpg', s)]
        folders = [s for s in folders if not re.search('png', s)]

        for experiemnt in folders:
            logging.debug("Folder under analysis -> " + str(experiemnt))
            second_path = self.path + experiemnt + "/"
            res = how_many_folder(second_path)
            folders = [s for s in folders if not re.search('txt', s)]
            folders = [s for s in folders if not re.search('jpg', s)]
            folders = [s for s in folders if not re.search('png', s)]
            num_folder = len(res)
            logging.debug("Folder to analise -> " + str(num_folder))

            for el in res:
                logging.debug("Folder under analysis -> " + str(el))
                path_here = second_path + str(el) + "/"

                names = []
                for i in os.listdir(path_here):
                    if os.path.isfile(
                            os.path.join(path_here, i)
                    ) and 'trajectory-generate-aSs-' in i and ".zip" in i:
                        names.append(i)

                names = sorted_nicely(names)

                pops = Populations()
                # find the trajectories ID and Points
                trajectories = self.read_trajectory_info(path_here +
                                                         "trajectory.zip")
                for tra in trajectories:
                    pops.add_population(Population(tra))

                # analysing the fitness
                logging.debug("Analysing the fitness...")
                max_agent, max_classifier = self.find_max_values_fitness(
                    path_here)
                agent_generations_info, classifier_generations_info = self.read_fitness(
                    path_here, max_agent, max_classifier)

                x = np.arange(len(agent_generations_info))
                y_agent = []
                std_agent = []
                for element in agent_generations_info:
                    y_agent.append(element.mean)
                    std_agent.append(element.std)
                y_classifier = []
                std_classifier = []
                for element in classifier_generations_info:
                    y_classifier.append(element.mean)
                    std_classifier.append(element.std)

                # print fitnes
                self.print_fitnes(x, y_agent, std_agent, y_classifier,
                                  std_classifier, path_here)

                total_distances = []
                total_distances_msd = []
                std_distances = []
                last_generations_values = []
                logging.debug("Analysing Trajectories...")
                for i in tqdm.tqdm(range(len(names))):
                    name = names[i]

                    # obtain info from the file
                    individuals = self.read_info(path_here + name)

                    if i == len(names) - 1:
                        for ind in individuals:
                            for el in ind.array:
                                last_generations_values.append(el)

                    msds = []
                    for ind in individuals:
                        msds.append(ind.MSD)
                    total_distances.append(np.mean(np.array(msds)))
                    std_distances.append(np.std(np.array(msds)))

                    # store the msd per trajectory
                    distance_per_trajectories = {}
                    for j in range(number_of_trajectories):
                        distances = []
                        for indiv in individuals:
                            if indiv.trajectoryID == pops.get_population(
                                    j).tra.trajectoryID:
                                distances.append(indiv.MSD)

                        array = np.array(distances)
                        MSD = (np.sum(array)) / len(array)
                        distance_per_trajectories.update({j: MSD})
                    total_distances_msd.append(distance_per_trajectories)

                # print graph msd per trajectory
                self.print_graph_msd_per_trajectory(total_distances_msd,
                                                    path_here)

                # print graph total msd
                self.print_graph_msd_total(total_distance, std_distances,
                                           path_here)

                # save the last value
                array = np.array(last_generations_values)
                MSD = (np.sum(array)) / len(array)

                with open(path_here + "/MSD.txt", "w") as text_file:
                    text_file.write(str(MSD))
        #     lng_last.append(el[1])

        # real_bearing = computeBearing(lat_last[len(lat_last) -1], lng_last[len(lat_last) -1], lat_real[0], lng_real[0])
        #
        # distances = []
        # # compute distance
        # for i in range(len(lat_generated)):
        #     # compute the distances
        #     bearing = computeBearing(lat_last[len(lat_last) -1], lng_last[len(lat_last) -1], lat_generated[i], lng_generated[i])
        #     distances.append(fabs(bearing - real_bearing))
        #
        # array = np.array(distances)
        # real_distances.append((np.max(array), np.min(array), np.mean(array), np.std(array)))

    x = []
    x = np.arange(0, len(total_distances))
    max_value = []
    min = []
    mean = []
    std = []
    for el in total_distances:
        a = []
        b = []
        c = []
        d = []
        for k in el.keys():
            a.append(el[k][0])
            b.append(el[k][1])
            c.append(el[k][2])
            d.append(el[k][3])
        max_value.append(np.mean(np.array(a)))
Exemple #18
0
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.DEBUG)

    num = [3]
    for el in num:
        path = "/Volumes/TheMaze/TuringLearning/Experiment Christmas Holidays/Experiment-commalambda100/" + str(
            el) + "/"
        files = 0
        for i in os.listdir(path):
            if os.path.isfile(os.path.join(
                    path,
                    i)) and 'trajectory-generatedPoints-' in i and ".zip" in i:
                files += 1

        max = files
        vect = np.arange(1, max + 1)
        for numb in vect:
            name = "trajectory-generatedPoints-" + str(numb) + "-" + str(
                numb) + ".zip"
            trajectories_label, json_file = reanInfo(path + name)

            # center lat, center lng, zoom
            lat_real = []
            lng_real = []
            for el in json_file[trajectories_label[0]]["real"]:
                lat_real.append(el[0])
                lng_real.append(el[1])

            gmap = gmplot.GoogleMapPlotter(lat_real[0], lng_real[0], 20)

            for el in trajectories_label:
Exemple #19
0
    all_real_distances = []
    all_bearing_distances = []
    for folder in folders:
        logging.debug("Analysing folder " + str(folder))

        real_path = path + "/" + str(folder) + "/"

        files = 0
        for i in os.listdir(real_path):
            if os.path.isfile(os.path.join(
                    real_path,
                    i)) and 'trajectory-generatedPoints-' in i and ".zip" in i:
                files += 1

        vect = np.arange(1, files + 1)

        # read all the trajectories for the distance to the real point
        logging.debug("Checking the distances")
        real_distances, real_distances_bearing = analise_distances(
            path, folder)

        x = []
        median = []
        std = []
        x = np.arange(0, len(real_distances))
        for el in real_distances:
            median.append(el[2])
            std.append(el[3])

        all_real_distances.append((x, median, std))
Exemple #20
0
                # read the fitness and return it scaled 0-1
                logging.debug("Checking the fitness...")
                scaled_version_agent, gen_agent, scaled_version_classifier, gen_classifier, ok = analise_single_folder(
                    path, folder, max_agent, max_classifier)

                if not ok:
                    problems.append((experiemnt, folder,
                                     "Problem with number of fitness values"))

                # read all the trajectories for the distance to the real point
                logging.debug("Checking the distances...")
                real_distances, real_distances_bearing = analise_distances(
                    path, folder, bigOrSmall)

                x = []
                x = np.arange(0, len(real_distances))
                max_value = []
                min_value = []
                mean = []
                std = []
                for el in real_distances:
                    a = []
                    b = []
                    c = []
                    d = []
                    for k in el.keys():
                        a.append(el[k][0])
                        b.append(el[k][1])
                        c.append(el[k][2])
                        d.append(el[k][3])
                    max_value.append(np.mean(np.array(a)))

# and the transformation is applied on the test data for later use.
# The train data will be transformed while it is being fit.
y_test_binary = pd.DataFrame(y_test["value"].apply(getBinary))

regressorLow = XGBRegressor(gamma=0.0,
                            n_estimators=200,
                            base_score=0.5,
                            colsample_bytree=0.7,
                            learning_rate=0.2,
                            max_depth=5,
                            objective="reg:linear")
xgbModelLow = regressorLow.fit(X_train, y_train.value)
xgboost.plot_importance(xgbModelLow)
y_predicted = xgbModelLow.predict(X_test)

y_predicted_binary = [1 if yp >= 0.5 else 0 for yp in y_predicted]

print(accuracy_score(y_test_binary, y_predicted_binary))

fig = plt.figure(figsize=(8, 8))
plt.xticks(rotation='vertical')
y_pos = np.arange(len(xgbModelLow.feature_importances_))
plt.barh([i for i in range(len(xgbModelLow.feature_importances_))],
         xgbModelLow.feature_importances_.tolist(),
         align='center',
         alpha=0.4)
plt.yticks(y_pos, X_test.columns)
plt.show()
        'Virginia Beach', 'Baltimore', 'Denver', 'Detroit', 'San Antonio',
        'Phoenix', 'Oklahoma City', 'Indianapolis', 'Milwaukee', 'Sacramento',
        'Washington, D.C.', 'Colorado Springs', 'Honolulu', 'Nashville',
        'Jacksonville', 'Louisville', 'Seattle', 'Memphis', 'Fresno', 'Boston',
        'Mineappolis', 'San Jose', 'Tulsa', 'Charlotte', 'San Diego',
        'Los Angeles', 'Long Beach', 'Cleveland', 'San Francisco',
        'Albuquerque', 'Arlington, TX', 'Omaha', 'Wichita', 'Las Vegas'
    ]

    grad = DataFrame({'change': change, 'city': city})

    plt.figure(figsize=(3, 8))

    change = grad.change[grad.change > 0]
    city = grad.city[grad.change > 0]
    pos = np.arange(len(change))

    plt.title('1995-2005 Change in HS graduation rate')
    plt.barh(pos, change)

    # add the numbers to the side of each bar
    for p, c, ch in zip(pos, city, change):
        plt.annotate(str(ch), xy=(ch + 1, p + .5), va='center')

    # cutomize ticks
    ticks = plt.yticks(pos + .5, city)
    xt = plt.xticks()[0]
    plt.xticks(xt, [' '] * len(xt))

    # minimize chartjunk
    remove_border(left=False, bottom=False)
Exemple #23
0
    def __init__(self, pnt_file, name=None):
        self._pnt_file = pathlib.Path(pnt_file)
        # Load pnt file, returns header (dict) and raw samples
        self._pnt_header, pnt_samples = Pnt.load(self._pnt_file)

        # Get clean WGS84 coordinates (use +/- instead of N/E)
        self._latitude = self.pnt_header_value(Pnt.Header.GPS_WGS84_LATITUDE)
        self._longitude = self.pnt_header_value(Pnt.Header.GPS_WGS84_LONGITUDE)
        north = self.pnt_header_value(Pnt.Header.GPS_WGS84_NORTH)
        east = self.pnt_header_value(Pnt.Header.GPS_WGS84_EAST)
        if north.upper() != 'N':
            self._latitude = -self._latitude
        if east.upper() != 'E':
            self._longitude = -self._longitude
        if abs(self._latitude) > 90:
            log.warning('Latitude value {} invalid, replacing by None'.format(
                self._latitude))
            self._latitude = None
        if abs(self._longitude) > 180:
            log.warning('Longitude value {} invalid, replacing by None'.format(
                self._longitude))
            self._longitude = None

        # Get a proper timestamp by putting pnt entries together
        self._timestamp = None
        year = self.pnt_header_value(Pnt.Header.TIMESTAMP_YEAR)
        month = self.pnt_header_value(Pnt.Header.TIMESTAMP_MONTH)
        day = self.pnt_header_value(Pnt.Header.TIMESTAMP_DAY)
        hour = self.pnt_header_value(Pnt.Header.TIMESTAMP_HOUR)
        minute = self.pnt_header_value(Pnt.Header.TIMESTAMP_MINUTE)
        second = self.pnt_header_value(Pnt.Header.TIMESTAMP_SECOND)
        try:
            self._timestamp = datetime(year,
                                       month,
                                       day,
                                       hour,
                                       minute,
                                       second,
                                       tzinfo=pytz.UTC)
            log.info(
                'Timestamp of profile as reported by pnt header is {}'.format(
                    self.timestamp))
        except ValueError:
            log.warning('Unable to build timestamp from pnt header fields')

        # Set name of profile (by default a entry from pnt header)
        self._name = self.pnt_header_value(Pnt.Header.FILENAME)
        if name:
            self._name = name

        # Get other important entries from header
        self._samples_count = self.pnt_header_value(
            Pnt.Header.SAMPLES_COUNT_FORCE)
        self._spatial_resolution = self.pnt_header_value(
            Pnt.Header.SAMPLES_SPATIALRES)
        self._overload = self.pnt_header_value(Pnt.Header.SENSOR_OVERLOAD)
        self._speed = self.pnt_header_value(Pnt.Header.SAMPLES_SPEED)

        self._smp_serial = str(self.pnt_header_value(Pnt.Header.SMP_SERIAL))
        self._smp_firmware = str(self.pnt_header_value(
            Pnt.Header.SMP_FIRMWARE))
        self._smp_length = self.pnt_header_value(Pnt.Header.SMP_LENGTH)
        self._smp_tipdiameter = self.pnt_header_value(
            Pnt.Header.SMP_TIPDIAMETER)
        self._gps_pdop = self.pnt_header_value(Pnt.Header.GPS_PDOP)
        self._gps_numsats = self.pnt_header_value(Pnt.Header.GPS_NUMSATS)
        self._amplifier_range = self.pnt_header_value(
            Pnt.Header.AMPLIFIER_RANGE)
        self._amplifier_serial = self.pnt_header_value(
            Pnt.Header.AMPLIFIER_SERIAL)
        self._sensor_serial = self.pnt_header_value(Pnt.Header.SENSOR_SERIAL)
        self._sensor_sensivity = self.pnt_header_value(
            Pnt.Header.SENSOR_SENSITIVITIY)

        # Create a pandas dataframe with distance and force
        distance_arr = np.arange(
            0, self._samples_count) * self._spatial_resolution
        factor = self.pnt_header_value(Pnt.Header.SAMPLES_CONVFACTOR_FORCE)
        force_arr = np.asarray(pnt_samples) * factor
        stacked = np.column_stack([distance_arr, force_arr])
        self._samples = pd.DataFrame(stacked, columns=('distance', 'force'))

        self._ini = configparser.ConfigParser()

        # Look out for corresponding ini file

        self._ini_file = self._pnt_file.with_suffix('.ini')
        if self._ini_file.exists():
            log.info('Reading ini file {} for {}'.format(self._ini_file, self))
            self._ini.read(self._ini_file)

        # Ensure a section called 'markers' does exist
        if not self._ini.has_section('markers'):
            self._ini.add_section('markers')

        # Check for invalid values (non floats) in 'markers' section
        for k, v in self._ini.items('markers'):
            try:
                float(v)
                log.info('Marker: {}={}'.format(k, v))
            except ValueError:
                log.warning(
                    'Ignoring value {} for marker {}, not float value'.format(
                        repr(v), repr(k)))
                self._ini.remove_option('markers', k)
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import np

sns.set_style("whitegrid")
plt.plot(np.arange(10))
plt.show()
Exemple #25
0
    def plot_dvh(self):
        """
        :return: DVH plot using Matplotlib library.
        """

        # Initialisation of the plots
        fig, ax = plt.subplots()
        fig.subplots_adjust(0.1, 0.15, 1, 1)
        # Maximum value for x axis
        max_xlim = 0

        # Plot for all the ROIs selected in the left column of the window
        for roi in self.selected_rois:
            dvh = self.raw_dvh[int(roi)]

            # Plot only the ROIs whose volume is non equal to 0
            if dvh.volume != 0:
                # Bincenters, obtained from the dvh object, give the x axis values
                # (Doses originally in Gy unit)
                bincenters = self.dvh_x_y[roi]['bincenters']
                #print(self.dvh_x_y[roi])

                # Counts, obtained from the dvh object, give the y axis values
                # (values between 0 and dvh.volume)
                counts = self.dvh_x_y[roi]['counts']

                # Color of the line is the same as the color shown in the left column of the window
                color = self.patient_dict_container.get("roi_color_dict")[roi]
                color_R = color.red() / 255
                color_G = color.green() / 255
                color_B = color.blue() / 255

                plt.plot(100 * bincenters,
                         100 * counts / dvh.volume,
                         label=dvh.name,
                         color=[color_R, color_G, color_B])

                # Update the maximum value for x axis (usually different between ROIs)
                if (100 * bincenters[-1]) > max_xlim:
                    max_xlim = 100 * bincenters[-1]

                plt.xlabel('Dose [%s]' % 'cGy')
                plt.ylabel('Volume [%s]' % '%')
                if dvh.name:
                    plt.legend(loc='lower center', bbox_to_anchor=(0, 1, 5, 5))

        # Set the range values for x and y axis
        ax.set_ylim([0, 105])
        ax.set_xlim([0, max_xlim + 3])

        # Create the grids on the plot
        major_ticks_y = np.arange(0, 105, 20)
        minor_ticks_y = np.arange(0, 105, 5)
        major_ticks_x = np.arange(0, max_xlim + 250, 1000)
        minor_ticks_x = np.arange(0, max_xlim + 250, 250)
        ax.set_xticks(major_ticks_x)
        ax.set_xticks(minor_ticks_x, minor=True)
        ax.set_yticks(major_ticks_y)
        ax.set_yticks(minor_ticks_y, minor=True)
        ax.grid(which='minor', alpha=0.2)
        ax.grid(which='major', alpha=0.5)

        # Add the legend at the bottom left of the graph
        if len(self.selected_rois) != 0:
            ax.legend(loc='upper left', bbox_to_anchor=(-0.1, -0.15), ncol=4)

        plt.subplots_adjust(bottom=0.3)

        return fig
Exemple #26
0
status_id_df = result[['Positivity', 'status_id']].drop_duplicates().sort_values('status_id')
status_to_id = dict(status_id_df.values)
id_to_status = dict(status_id_df[['status_id', 'Positivity']].values)
result.head()
'''

def accuracy_summary(pipeline, X_train, y_train, X_test, y_test):
    sentiment_fit = pipeline.fit(X_train, y_train)
    y_pred = sentiment_fit.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("\n\nΒαθμολογία Ακρίβειας: {0:.2f}%".format(accuracy*100))
    return accuracy

cv = CountVectorizer()
rf = RandomForestClassifier(class_weight="balanced")
n_features = np.arange(10000,25001,5000)

def nfeature_accuracy_checker(vectorizer=cv, n_features=n_features, stop_words=None, ngram_range=(1, 1), classifier=rf):
    result = []
    print(classifier)
    print("\n")
    for n in n_features:
        vectorizer.set_params(stop_words=STOPWORDS, max_features=n, ngram_range=ngram_range)
        checker_pipeline = Pipeline([
            ('vectorizer', vectorizer),
            ('classifier', classifier)
        ])
        print("Αποτελέσματα Ελέγχου για {} χαρακτηριστικά".format(n))
        nfeature_accuracy = accuracy_summary(checker_pipeline, X_train, y_train, X_test, y_test)
        result.append((n,nfeature_accuracy))
    return result
def allTheAgents(path):
    logging.basicConfig(format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.DEBUG)

    names = []
    for i in os.listdir(path):
        if os.path.isfile(os.path.join(
                path, i)) and 'trajectory-generate-aSs-' in i and ".zip" in i:
            names.append(i)

    names = sorted_nicely(names)

    total_distances = []
    numb = 0
    logging.debug("Analysing Trajectories...")
    for i in tqdm.tqdm(range(len(names))):
        name = names[i]
        numb += 1
        # name = "trajectory-generatedPoints-" + str(numb) + "-" + str(numb) + ".zip"

        trajectories_label, json_file, id_label = reanInfo(path + name)
        # number = 0
        # while number < len(id_label):

        # real points
        lat_real = []
        lng_real = []
        # generated points
        lat_generated = []
        lng_generated = []

        label_real = []
        label_generated = []
        for labels in trajectories_label:
            for el in json_file[labels]["real"]:
                if el[0] not in lat_real:
                    lat_real.append(el[0])
                    lng_real.append(el[1])
                    label_real.append(json_file[labels]["id"])

            for el in json_file[labels]["generated"]:
                if el[0] not in lat_generated:
                    lat_generated.append(el[0])
                    lng_generated.append(el[1])
                    label_generated.append(json_file[labels]["id"])

        distance_per_trajectories = {}
        # now for every trajectory compute the distance of the generated distance
        for i in range(len(label_real)):
            index = [
                j for j, x in enumerate(label_generated) if x == label_real[i]
            ]
            distances = []
            for ind in index:
                a = np.array((lat_real[i], lng_real[i]))
                b = np.array((lat_generated[ind], lng_generated[ind]))
                value = np.linalg.norm(a - b) * 100000
                value = pow(value, 2)
                distances.append(value)

            array = np.array(distances)
            distance_per_trajectories.update({i: array})
        total_distances.append(distance_per_trajectories)

    df = DataFrame(columns=['gen', 'tra', "ind", 'distance'])

    x = []
    x = np.arange(0, len(total_distances))
    i = 0
    number_of_trajectories = 0
    for el in total_distances:
        number_of_trajectories = len(el.keys())
        for k in el.keys():
            array = el[k]
            q = len(array)

            for qq in range(q):
                d = {"gen": i, "tra": k, "ind": qq, "distance": array[qq]}
                dfs = DataFrame(data=d, index=[i])
                df = df.append(dfs)
        i += 1
    sns.set_style("darkgrid")
    df = df[df.columns].astype(float)

    # g = sns.lmplot(x="gen", y="MSD", hue="tra", data=df, scatter_kws={"s": 1}, fit_reg=False)
    # g.set(ylim=(0, 0.0000004))

    for tra in range(number_of_trajectories):
        a = df.loc[df['tra'] == tra]
        g = sns.lmplot(x="gen",
                       y="distance",
                       hue="ind",
                       data=a,
                       scatter_kws={"s": 1},
                       fit_reg=False)
        g.set(ylim=(0, 120))

    plt.show()