def main(): """ main function """ file_header, file_data = read_comma_separated_file('stocks.dat') problem1a(file_header, file_data) problem1b(file_header, file_data) problem1c(file_header, file_data)
def main(): """ main function """ header, data = read_comma_separated_file("microprocessors.dat") data_processor = [x[0] for x in data] data_year = np.array([int(x[1]) for x in data]) data_transistors = np.array([int(x[2]) for x in data]) year_dict = dict(zip(data_year, data_processor)) transistor_dict = dict(zip(data_processor, data_transistors)) y = np.arange(len(data_processor)) fig, (ax1, ax2) = plt.subplots(1, 2) ax1.plot(sorted(year_dict.keys()), y, 'ko') xtick_labels = [ year_dict[x] for x in sorted(year_dict.keys()) ] ax1.set_yticks( y ) ax1.set_yticklabels(xtick_labels) ax2.set_yticklabels([]) ordered_transistor_data = np.array([ transistor_dict[year_dict[x]] for x in sorted(year_dict.keys()) ]) ax2.plot(ordered_transistor_data, y, 'ko') ax2.set_yticks( y ) ax2.set_xscale('log') ax2.set_xlim( min(ordered_transistor_data) * .5, max(ordered_transistor_data) * 1.5 ) ax1.margins(.05, .1) ax2.margins(0, .1) ax1.grid(True) ax2.grid(True, which='both') ax1.tick_params(direction='out') ax2.tick_params(direction='out', which="both") ax1.set_title("Processor Year Released") ax2.set_title("Processor Transistors") ax1.set_xlabel('Year') ax2.set_xlabel('Number of Transistors') # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1) fig.tight_layout() fig.patch.set_facecolor('white') # fig.savefig('problem3.png') plt.show()
def main(): """ main function """ header, data = read_comma_separated_file("genes.dat") arr_data = np.zeros([len(data), 4]) for x in xrange(len(data)): arr_data[x, 0] = float(data[x][0]) arr_data[x, 1] = float(data[x][1]) arr_data[x, 2] = float(data[x][2]) arr_data[x, 3] = float(data[x][3]) fig, ax = plt.subplots(4, 4, sharex=True, sharey=True) for x in xrange(4): for y in xrange(4): ax[x, y].plot( arr_data[:, x], arr_data[:, y], '.') ax[x, y].margins(.1, .1) ax[x, y].set_title(GENE_TITLE[x] + " vs. " + GENE_TITLE[y]) a_x = np.linspace( arr_data[:, 0].min(), arr_data[:, 0].max(), 100) # C is the best fit. Run a linear regression on it. ac_fit = np.poly1d(np.polyfit(arr_data[:, 0], arr_data[:, 2], 1)) ac_y = ac_fit(a_x) ax[0, 2].plot(a_x, ac_y, '-', color='r') # D is the second best correllation. - cubic ad_fit = np.poly1d(np.polyfit(arr_data[:, 0], arr_data[:, 3], 3)) ad_y = ad_fit(a_x) ax[0, 3].plot(a_x, ad_y, '-', color='r') # B is not really correlated at all... - order 5 ab_fit = np.poly1d(np.polyfit(arr_data[:, 0], arr_data[:, 1], 5)) ab_y = ab_fit(a_x) ax[0, 1].plot(a_x, ab_y, '-', color='r', ) fig.tight_layout() fig.patch.set_facecolor('white') fig.suptitle('Gene Correlations', fontsize=20) plt.subplots_adjust(top=0.85) fig.savefig('problem4.png') plt.show()