Example #1
0
def main():
    """ main function """
    file_header, file_data = read_comma_separated_file('stocks.dat')

    problem1a(file_header, file_data)
    problem1b(file_header, file_data)
    problem1c(file_header, file_data)
Example #2
0
def main():
    """ main function """
    header, data = read_comma_separated_file("microprocessors.dat")

    data_processor = [x[0] for x in data]
    data_year = np.array([int(x[1]) for x in data])
    data_transistors = np.array([int(x[2]) for x in data])

    year_dict = dict(zip(data_year, data_processor))
    transistor_dict = dict(zip(data_processor, data_transistors))

    y = np.arange(len(data_processor))
    fig, (ax1, ax2) = plt.subplots(1, 2)

    ax1.plot(sorted(year_dict.keys()), y, 'ko')
    xtick_labels = [ year_dict[x] for x in sorted(year_dict.keys()) ]

    ax1.set_yticks( y )
    ax1.set_yticklabels(xtick_labels)
    ax2.set_yticklabels([])

    ordered_transistor_data = np.array([ transistor_dict[year_dict[x]] for x in sorted(year_dict.keys()) ])
    ax2.plot(ordered_transistor_data, y, 'ko')
    ax2.set_yticks( y )
    ax2.set_xscale('log')
    ax2.set_xlim( min(ordered_transistor_data) * .5, max(ordered_transistor_data) * 1.5 )

    ax1.margins(.05, .1)
    ax2.margins(0, .1)
    ax1.grid(True)
    ax2.grid(True, which='both')
    ax1.tick_params(direction='out')
    ax2.tick_params(direction='out', which="both")
    ax1.set_title("Processor Year Released")
    ax2.set_title("Processor Transistors")
    ax1.set_xlabel('Year')
    ax2.set_xlabel('Number of Transistors')

    # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
    fig.tight_layout()
    fig.patch.set_facecolor('white')
    # fig.savefig('problem3.png')
    plt.show()
Example #3
0
def main():
    """ main function """
    header, data = read_comma_separated_file("genes.dat")

    arr_data = np.zeros([len(data), 4])
    for x in xrange(len(data)):
        arr_data[x, 0] = float(data[x][0])
        arr_data[x, 1] = float(data[x][1])
        arr_data[x, 2] = float(data[x][2])
        arr_data[x, 3] = float(data[x][3])

    fig, ax = plt.subplots(4, 4, sharex=True, sharey=True)

    for x in xrange(4):
        for y in xrange(4):
            ax[x, y].plot( arr_data[:, x], arr_data[:, y], '.')
            ax[x, y].margins(.1, .1)
            ax[x, y].set_title(GENE_TITLE[x] + " vs. " + GENE_TITLE[y])

    a_x = np.linspace( arr_data[:, 0].min(), arr_data[:, 0].max(), 100)

    # C is the best fit. Run a linear regression on it.
    ac_fit = np.poly1d(np.polyfit(arr_data[:, 0], arr_data[:, 2], 1))
    ac_y = ac_fit(a_x)
    ax[0, 2].plot(a_x, ac_y, '-', color='r')

    # D is the second best correllation. - cubic
    ad_fit = np.poly1d(np.polyfit(arr_data[:, 0], arr_data[:, 3], 3))
    ad_y = ad_fit(a_x)
    ax[0, 3].plot(a_x, ad_y, '-', color='r')

    # B is not really correlated at all... - order 5
    ab_fit = np.poly1d(np.polyfit(arr_data[:, 0], arr_data[:, 1], 5))
    ab_y = ab_fit(a_x)
    ax[0, 1].plot(a_x, ab_y, '-', color='r', )

    fig.tight_layout()
    fig.patch.set_facecolor('white')
    fig.suptitle('Gene Correlations', fontsize=20)
    plt.subplots_adjust(top=0.85)
    fig.savefig('problem4.png')
    plt.show()