def get_total_coding_rgn(features): """get total fraction of genome that is covered by coding region Not currently used in EGRIN2 pipeline but can be used to assess validity of cis-regulatory motifs.""" # Get coding regions from features file (e.g. Escherichia_coli_K12_features) with open(features, 'r') as f: skip = 1 line = f.readline() while 'header' not in line: line = f.readline() skip += 1 features = pd.read_table(features, skiprows=skip) genome_len = int(features.ix[0].end_pos) features = features[ features.type != 'SEQ_END' ] start_pos = npstr.replace(features.start_pos.values.astype(str),'<','').astype(int) end_pos = npstr.replace(features.end_pos.values.astype(str),'>','').astype(int) # tbd: use bitarray? (package bitarray https://pypi.python.org/pypi/bitarray) hits = np.zeros(genome_len + 1, dtype=bool) for i in range(len(start_pos)): hits[start_pos[i]:end_pos[i]] = True return np.mean(hits)
def get_x(file): # read the train-x file train_x = np.genfromtxt(file, dtype='str', delimiter=',') train_x = npy_dy.replace(train_x, 'M', '0.25') train_x = npy_dy.replace(train_x, 'F', '0.5') train_x = npy_dy.replace(train_x, 'I', '0.75') return train_x.astype(np.float)
def create_graph(file_path, resistor, y_axis_max): fig, ax = plt.subplots() file_name = os.path.basename(file_path).split('.')[0] data = np.genfromtxt(file_path, delimiter=',', dtype=str, skip_header=7) is_wet_test = True if data.shape[ 1] == 5 else False #4 columns is for dry test, while 5 columns is for wet test if is_wet_test: data = data[:, 2:] #remove first two columns (time string can't be converted to float data = np_f.replace( data, '"', '' ) #the csv files have double quotes for some reason - these need to be removed data = data.astype(np.float) #convert remaining data to flaot sp_height = data[:, 0] ls_volts = data[:, 1] ps_volts = data[:, 2] sp_height_rel = sp_height[0] - sp_height ls_ohms = ls_volts / ((ps_volts - ls_volts) / float(resistor)) ax.set_xlim([0, sp_height_rel.max()]) ax.set_ylim([0, float(y_axis_max)]) ax.plot(sp_height_rel, ls_ohms, linewidth=0.1) ax.set_xlabel('Height (mm)', fontsize=7) ax.set_ylabel('LS Resistance (ohms)', fontsize=7) start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(0, end, 10)) else: #dry test data = data[:, 1:] data = np_f.replace( data, '"', '' ) # the csv files have double quotes for some reason - these need to be removed data = data.astype(np.float) # convert remaining data to flaot ls_time = data[:, 0] ls_volts = data[:, 1] ps_volts = data[:, 2] ls_ohms = ls_volts / ((ps_volts - ls_volts) / float(resistor)) ax.set_xlim([0, ls_time.max()]) ax.set_ylim([0, float(y_axis_max)]) ax.plot(ls_time, ls_ohms, linewidth=0.1) ax.set_xlabel('Time (sec)', fontsize=7) ax.set_ylabel('LS Resistance (ohms)', fontsize=7) start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(0, end, 1)) ax.tick_params(labelsize=5) ax.set_title(file_name, fontsize=7) ax.grid(linewidth=0.1) start, end = ax.get_ylim() ax.yaxis.set_ticks(np.arange(0, end, 50)) #fig.savefig("test.png") #plt.show() make_pdf(file_path)
def mutasi(individu): for i in range(2): random_index = randint(0, 6) ori = individu[random_index] mutan = npc.replace(ori, "0", "2") mutan = npc.replace(mutan, "1", "0") mutan = npc.replace(mutan, "2", "1") individu[random_index] = mutan return individu
def img_dirs(resource, dir_) -> np.ndarray: dumped = join(DUMP_DIR, "img_dirs_{}.npy".format(resource)) if exists(dumped): return np.load(dumped) if resource == "jpg": pathname = dir_+"*/*/*.jpg" if sys.platform == "win32": pathname = pathname.replace("\\", "/") dirs = glob(pathname=pathname) dirs = np.array(dirs) # dirs.dump(dumped) elif resource == "csv": pathname = dir_+"*/*images*.csv" if sys.platform == "win32": pathname = pathname.replace("\\", "/") dirs = glob(pathname=pathname) dfs = np.array(['image_name', 'image_url']) for c in dirs: dfs = np.append(dfs, genfromtxt(c, dtype=np.str, delimiter=",")[1:]) dirs = dfs.reshape((-1, 2))[1:] dirs[:, 0] = char.replace(dirs[:, 0], ".jpg", "") # dirs.dump(dumped) else: raise FileNotFoundError("can't find {} images".format(resource)) return dirs
def read_pandas_txt(data_dir, train=True): data_df = pd.read_csv(data_dir, header=None) #os.path.join(os.getcwd(), temp_ = np.asarray([str(x[0]).split(" ") for x in data_df.values]) bin_temp_ = np_f.replace(temp_, ['semantic'], ['binary']) ins_temp_ = np_f.replace(bin_temp_, ['binary'], ['instance']) print(bin_temp_, ins_temp_) print("TOTAL NUMBERS:", temp_.shape) if train: X = temp_[:, 0] y = bin_temp_[:, 1] y_ins = ins_temp_[:, 1] return np.array(X), np.array(y), np.array(y_ins) #,coords else: X = temp_[:, 0] return np.array(X)
def load_source_rows(tab, names, key='assoc'): """Load the rows from a table that match a source name. Parameters ---------- tab : `astropy.table.Table` Table that will be searched. names : list List of source identifiers. key : str Name of the table column that will be searched for a source matching key. Returns ------- outtab : `astropy.table.Table` Table containing the subset of rows with matching source identifiers. """ names = [name.lower().replace(' ', '') for name in names] col = tab[[key]].copy() col[key] = defchararray.replace(defchararray.lower(col[key]), ' ', '') mask = create_mask(col, {key: names}) return tab[mask]
def stats(data): dict_of_lists = defaultdict(list) data = np.array(data) data_tran = data.T for i in data_tran: dict_of_lists[i[0]].append(i[1:]) dict_of_lists[i[0]] = np_f.replace(dict_of_lists[i[0]], 'NA', '0') for i in dict_of_lists.keys(): dict_of_lists[i] = dict_of_lists[i].astype(float) dict_of_lists[i] = dict_of_lists[i][0] list_of_dicts = dict_of_lists max_list = [] min_list = [] mean_list = [] median_list = [] var = [] for i in list_of_dicts.keys(): max_list.append(max(list_of_dicts[i])) min_list.append(min(list_of_dicts[i])) mean_list.append(np.mean(list_of_dicts[i])) median_list.append(np.median(list_of_dicts[i])) var.append(i) return max_list, min_list, mean_list, median_list, var
def plot_pie_difficulty(filename, data): # Gráfico de pizza (direita) correspondente a quanto pagariam pelo jogo data = np.array(COLUMN_DIFFICULTY_ITENS)[data - 1] labels, counts_elements = np.unique(data, return_counts=True) slices = np.round(counts_elements / len(data) * 100,1 ) # Convertendo os labels para string e adicionando caracter de escape no $ (latex) labels_str = labels.astype(str) labels_str = np_f.replace(labels_str, '$', '\$') patches, texts, autotexts = plt.pie(slices, labels=labels_str, autopct='%.1f%%', startangle=90, counterclock=False) plt.setp(autotexts, size=16, weight="bold") plt.axis('equal') for t in texts: t.set_size('large') for t in autotexts: t.set_size('large') plt.savefig(filename, bbox_inches='tight', dpi=400) plt.show() plt.close() return slices, labels
def remove_cols(data, skip_cols): conv = [] colnr = 0 for col in data: if colnr % 200 == 0: print('processing column {0:d}...'.format(colnr)) gc.collect() if colnr not in skip_cols: col = strip(col, '"') col = replace(col, '', '0') col = replace(col, 'NA', '0') col = replace(col, 'false', '0') col = replace(col, 'true', '1') conv.append(col.astype(int16)) colnr += 1 gc.collect() return array(conv)
def plot_pie(filename, data1, data2): fig, axes = plt.subplots(nrows=1, ncols=2) # Gráfico de pizza (esquerda) correspondente a quantidade de jogadores com interesse em jogar length = len(data1) py = round( np.count_nonzero(data1 == COLUMN_PAY_ITENS[0]) / length * 100, 0) pn = round( np.count_nonzero(data1 == COLUMN_PAY_ITENS[1]) / length * 100, 0) patches, texts, autotexts = axes[0].pie([py, pn], labels=COLUMN_PLAY_ITENS, autopct='%.1f%%', startangle=90, counterclock=False) axes[0].set_title(COLUMN_PLAY, fontsize=8) axes[0].axis('equal') plt.setp(autotexts, size=12, weight="bold") for t in texts: t.set_size('x-small') for t in autotexts: t.set_size('x-small') # Gráfico de pizza (direita) correspondente a quanto pagariam pelo jogo labels, counts_elements = np.unique(data2, return_counts=True) slices = np.round(counts_elements / length * 100, 1) # Convertendo os labels para string e adicionando caracter de escape no $ (latex) labels_str = labels.astype(str) labels_str = np_f.replace(labels_str, '$', '\$') patches, texts, autotexts = axes[1].pie(slices, labels=labels_str, autopct='%.1f%%', startangle=90, counterclock=False) axes[1].set_title(COLUMN_PAY, fontsize=8) axes[1].axis('equal') plt.setp(autotexts, size=8, weight="bold") for t in texts: t.set_size('x-small') for t in autotexts: t.set_size('x-small') plt.axis('equal') plt.subplots_adjust(wspace=0.75) plt.savefig(filename + '_pie.png', bbox_inches='tight', dpi=400) plt.show() plt.close() return py, pn, slices, labels
def get_filenames(self, file_ext='.xml'): """ Helper function which gets the filename identifiers (exluding the file extension) from a directory Args: path_to_dataset (string): Path to directory with the files file_ext (string): File extension to be spliced out of filename Returns: ndarray with the files identifiers """ files = os.listdir(self.path_to_annotations) arr = np.array(files) result = replace(arr, file_ext, '') return result
def to_ints_only(data): conv = [] failed = [] colnr = 0 for col in data: colnr += 1 if colnr % 100 == 0: print('converting column {0:d}...'.format(colnr)) col = strip(col, '"') col = replace(col, '', '0') col = replace(col, 'NA', '0') col = replace(col, 'false', '0') col = replace(col, 'true', '1') try: irow = col.astype(int16) except ValueError as err: skiprows.append(colnr - 1) failed.append(str(err).split(':', 1)[1]) except OverflowError as err: print(str(err)) skiprows.append(colnr - 1) # except OverflowError as err: # print(str(err)) # print('will look for overflow error value...') # for v in col: # try: # v.astype(int) # except: # print 'overflow:', v else: conv.append(irow) del col gc.collect() # free memory print('failed for (excluding overflows): "{0:s}"'.format( '", "'.join(failed))) print('{0:d} columns removed'.format(len(failed))) return array(conv)
def to_ints_only(data): conv = [] failed = [] colnr = 0 for col in data: colnr += 1 if colnr % 100 == 0: print('converting column {0:d}...'.format(colnr)) col = strip(col, '"') col = replace(col, '', '0') col = replace(col, 'NA', '0') col = replace(col, 'false', '0') col = replace(col, 'true', '1') try: irow = col.astype(int16) except ValueError as err: skiprows.append(colnr - 1) failed.append(str(err).split(':', 1)[1]) except OverflowError as err: print(str(err)) skiprows.append(colnr - 1) # except OverflowError as err: # print(str(err)) # print('will look for overflow error value...') # for v in col: # try: # v.astype(int) # except: # print 'overflow:', v else: conv.append(irow) del col gc.collect() # free memory print('failed for (excluding overflows): "{0:s}"'.format('", "'.join(failed))) print('{0:d} columns removed'.format(len(failed))) return array(conv)
def create_graph(file_path, wet_test, y_axis_max, data_start_index): fig, ax = plt.subplots() file_name = os.path.basename(file_path).split('.')[0] data = np.genfromtxt(file_path, delimiter=',', dtype=str, skip_header=data_start_index) data = np_f.replace( data, '"', '' ) # the csv files have double quotes for some reason - these need to be removed data = data.astype(np.float) # convert remaining data to float if wet_test == 'True': sys_height = data[:, 1] ls_ohms = data[:, 4] ax.set_xlim([0, sys_height.max()]) ax.set_ylim([0, float(y_axis_max)]) ax.plot(sys_height, ls_ohms, linewidth=0.1) ax.set_xlabel('Height (mm)', fontsize=7) ax.set_ylabel('LS Resistance (ohms)', fontsize=7) start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(0, end, 10)) else: #dry test ls_time = data[:, 0] ls_ohms = data[:, 4] ax.set_xlim([0, ls_time.max()]) ax.set_ylim([0, float(y_axis_max)]) ax.plot(ls_time, ls_ohms, linewidth=0.1) ax.set_xlabel('Time (sec)', fontsize=7) ax.set_ylabel('LS Resistance (ohms)', fontsize=7) start, end = ax.get_xlim() ax.xaxis.set_ticks(np.arange(0, end, 1)) ax.tick_params(labelsize=5) ax.set_title(file_name, fontsize=7) ax.grid(linewidth=0.1) start, end = ax.get_ylim() ax.yaxis.set_ticks(np.arange(0, end, 50)) #fig.savefig("test.png") #plt.show() make_pdf(file_path)
def class_scoresmat2csv(matfile, bin_lid): """Convert a class score .mat file into a CSV representation""" try: import pandas as pd except ImportError: return '\n'.join(slow_class_scoresmat2csv(matfile, bin_lid)) scores = loadmat(matfile, squeeze_me=True) prefix = bin_lid + '_' cols = scores['class2useTB'][:-1] # exclude last class: 'unclassified' df = pd.DataFrame(scores['TBscores'], columns=cols) p = scores['roinum'].astype(str) p = replace(rjust(p,6,'0'),'0',prefix,1) pid = pd.Series(p) df.insert(0,'pid',pid) s = StringIO() df.to_csv(s,index=False, float_format='%f') csv_out = s.getvalue().replace('0.000000','0.0') return csv_out
def find_rows_by_string(tab, names, colnames=['assoc']): """Find the rows in a table ``tab`` that match at least one of the strings in ``names``. This method ignores whitespace and case when matching strings. Parameters ---------- tab : `astropy.table.Table` Table that will be searched. names : list List of strings. colname : str Name of the table column that will be searched for matching string. Returns ------- mask : `~numpy.ndarray` Boolean mask for rows with matching strings. """ mask = np.empty(len(tab), dtype=bool) mask.fill(False) names = [name.lower().replace(' ', '') for name in names] for colname in colnames: if colname not in tab.columns: continue col = tab[[colname]].copy() col[colname] = defchararray.replace(defchararray.lower(col[colname]).astype(str), ' ', '') for name in names: mask |= col[colname] == name return mask
def find_rows_by_string(tab, names, colnames=['assoc']): """Find the rows in a table ``tab`` that match at least one of the strings in ``names``. This method ignores whitespace and case when matching strings. Parameters ---------- tab : `astropy.table.Table` Table that will be searched. names : list List of strings. colname : str Name of the table column that will be searched for matching string. Returns ------- mask : `~numpy.ndarray` Boolean mask for rows with matching strings. """ mask = np.empty(len(tab), dtype=bool) mask.fill(False) names = [name.lower().replace(' ', '') for name in names] for colname in colnames: if colname not in tab.columns: continue col = tab[[colname]].copy() col[colname] = defchararray.replace( defchararray.lower(col[colname]).astype(str), ' ', '') for name in names: mask |= col[colname] == name return mask
def extract(address): def gen_divider(d): A = [] B = [] a = d[:,0][0] j = 0 for i in range(1,len(d[:,0])): if a != d[:,0][i] : B.append(i-1) A.append(j) a = d[:,0][i] j = i A.append(j) B.append(len(d[:,0])-1) V = [] V.append(A) V.append(B) return V prt_container = [] with open( address ) as pdbfile: for line in pdbfile: if line[:6] == 'SEQRES': splitted_line = np.array( [str(line[11]),str(line[19:22]), str(line[23:26]), str(line[27:30]), str( line[31:34]), str(line[35:38]), str( line[39:42]),str (line[43:46]), str (line[47:50]), str(line[51:54])] ) prt_container.append(splitted_line) d = np.array(prt_container) convert = {'A':'ALA','C':'CYS','D':"ASP",'E':'GLU', 'F':'PHE','G':'GLY','H':'HIS','I':'ILE', 'K':'LYS','L':'LEU','N':'ASN','M':'MET', 'P':'PRO','Q':'GLN','R':'ARG','S':'SER', 'T':'THR','V':'VAL','W':'TRP','Y':'TYR'} dummy = list(dict.fromkeys(convert)) for i in dummy: #to convert the array elements to d = np_f.replace(d ,convert[i], i) z= [] for j,i in zip(gen_divider(d)[0],gen_divider(d)[1]): x = np.concatenate(d[j:i+1,1:], axis = 0) z.append(''.join(list(x))) return z
def roundup(x): return int(math.ceil(x / 10.0)) * 10 if __name__ == "__main__": file_path = r'C:\Data\sweep test data (single cycle).csv' data = np.genfromtxt( file_path, delimiter=',', dtype=str, ) data = np_f.replace( data, '"', '' ) # the csv files have double quotes for some reason - these need to be removed data = data.astype(np.float) # convert remaining data to float create_graph( data=data, data_path=file_path, title_1= 'CA2020-3549, MAPPS, Post 6.8 Mechanical Strength of Electrical Connector', title_2='2921-1, Wet Test', tol_path= r'C:\Users\gtetil\Documents\Projects\Reliability-Sweeper\Source\Files\Tolerances\MLS Tolerance (MS, dry).csv', tol_band_color=0, graph_type=1, height_min=-15, height_max=235,
float, dz.observation_dict[obj_target + '_arc']) #This must be changed to a df arc_idx = (dz.reducDf.reduc_tag == 'arc_trim') & (dz.reducDf.RUN.isin( arc_run)) & (dz.reducDf.ISIARM == '{color} arm'.format( color=arm_color)) & (dz.reducDf.valid_file) arc_filename = dz.reducDf.loc[arc_idx, 'file_name'].values[0] arc_code = arc_filename[0:arc_filename.rfind('.')] #Get the object index_object = (dz.reducDf.reduc_tag == 'trim_image') & ( dz.reducDf.frame_tag == obj_target) & (dz.reducDf.ISIARM == '{color} arm'.format( color=arm_color)) & (dz.target_validity_check()) Files_Folder = dz.reducDf.loc[index_object, 'file_location'].values Files_Name = dz.reducDf.loc[index_object, 'file_name'].values output_names = np_f.replace(Files_Name.astype(str), '.fits', '_w.fits') for j in range(len(Files_Name)): dz.task_attributes['run folder'] = Files_Folder[j] dz.task_attributes['color'] = arm_color dz.task_attributes['input'] = Files_Folder[j] + Files_Name[j] dz.task_attributes['output'] = Files_Folder[j] + output_names[j] dz.task_attributes['fitnames'] = arc_code #Moving the data base to the obj/standard folder if not os.path.exists(dz.task_attributes['run folder'] + 'database/'): os.makedirs(dz.task_attributes['run folder'] + 'database/') input_arc_code = '{in_folder}database/fc{arc_code}'.format( in_folder=dz.reducFolders['arcs'], arc_code=arc_code)
def create_graph(data, data_path, title_1, title_2, tol_path, tol_band_color, graph_type, height_min, height_max, height_interval, resistance_max, resistance_interval, time_max, time_interval, graph_output_file, auto_open): fig = plt.figure() ax = fig.add_subplot(111) graph_type = Graph_Type(graph_type) # convert graph type to an enum graph_output_file = Graph_Output_File( graph_output_file) # convert graph output file to an enum tol_band_color = Color(tol_band_color) # convert tol band color to an enum # LS data data = np.array(data) height_array = data[:, 1] min_height = np.amin(height_array) max_height = np.amax(height_array) mid_height = (max_height - min_height) / 2 low_to_high_sweep = True if height_array[0] < mid_height else False if low_to_high_sweep: # if sweep starts low and goes high, use this algorithm first_transition_index = int(np.argwhere(height_array > mid_height)[0]) second_transition_index = int( np.argwhere(height_array[first_transition_index:] < mid_height - 10)[0]) + first_transition_index end_height_index = np.argmin(height_array[second_transition_index:( first_transition_index + second_transition_index)]) + second_transition_index else: # if sweep starts high and goes low, use this algorithm first_transition_index = int(np.argwhere(height_array < mid_height)[0]) second_transition_index = int( np.argwhere(height_array[first_transition_index:] > mid_height + 10)[0]) + first_transition_index end_height_index = np.argmax(height_array[second_transition_index:( first_transition_index + second_transition_index)]) + second_transition_index if graph_type in [Graph_Type.Both, Graph_Type.H_vs_R]: data = data[:end_height_index, :] time = data[:, 0] # separate data into e-to-f and f-to-e sys_height = data[:, 1] ls_ohms = data[:, 4] if low_to_high_sweep: min_indice = np.argmax(sys_height) empty_to_full = ls_ohms[:min_indice] full_to_empty = ls_ohms[min_indice:] empty_to_full_sys = sys_height[:min_indice] full_to_empty_sys = sys_height[min_indice:] else: min_indice = np.argmin(sys_height) empty_to_full = ls_ohms[min_indice:] full_to_empty = ls_ohms[:min_indice] empty_to_full_sys = sys_height[min_indice:] full_to_empty_sys = sys_height[:min_indice] lns = ax.plot() # height vs resistance if graph_type in [Graph_Type.Both, Graph_Type.H_vs_R]: ax.set_xlim(int(height_min), int(height_max)) ax.set_ylim(0, float(resistance_max)) e_to_f_plot = ax.plot(empty_to_full_sys, empty_to_full, linewidth=0.5, label='R vs. H - fill', color='blue') f_to_e_plot = ax.plot(full_to_empty_sys, full_to_empty, linewidth=0.5, label='R vs. H - drain', color='magenta') ax.set_xlabel('Height (mm)', fontsize=7) ax.set_ylabel(r'Resistance ($\Omega$)', fontsize=7) ax.xaxis.set_ticks( np.append(np.arange(height_min, height_max, height_interval), height_max)) ax.yaxis.set_ticks( np.append(np.arange(0, resistance_max, resistance_interval), resistance_max)) lns = e_to_f_plot + f_to_e_plot if graph_type == Graph_Type.H_vs_R: fig.suptitle(title_1, fontsize=10, fontweight='bold') ax.set_title(title_2, fontsize=10, y=1.03) # time vs resistance (with height vs resistance) if graph_type == Graph_Type.Both: ax2 = ax.twiny() ax2.set_xlim(0, time_max) r_vs_t = ax2.plot(time, ls_ohms, linewidth=0.5, label='Resistance vs. Time', color='orange') ax2.set_xlabel('Time (sec)', fontsize=7) ax2.xaxis.set_ticks( np.append(np.arange(0, time_max, time_interval), time_max)) lns = lns + r_vs_t ax2.tick_params(labelsize=5) plt.subplots_adjust(top=0.835) fig.suptitle(title_1, fontsize=10, fontweight='bold') ax.set_title(title_2, fontsize=10, y=1.095) # this raises the title to fit the top x-axis # time vs resistance (only) if graph_type == Graph_Type.T_vs_R: ax.set_xlim(0, time_max) ax.set_ylim(0, resistance_max) r_vs_t = ax.plot(time, ls_ohms, linewidth=0.5, label='Resistance vs. Time', color='orange') ax.set_xlabel('Time (sec)', fontsize=7) ax.set_ylabel(r'Resistance ($\Omega$)', fontsize=7) ax.xaxis.set_ticks( np.append(np.arange(0, time_max, time_interval), time_max)) ax.yaxis.set_ticks( np.append(np.arange(0, resistance_max, resistance_interval), resistance_max)) lns = r_vs_t fig.suptitle(title_1, fontsize=10, fontweight='bold') ax.set_title(title_2, fontsize=10, y=1.03) # tolerance bands if tol_path != '' and graph_type != Graph_Type.T_vs_R: tol_data = np.genfromtxt(tol_path, delimiter=',', dtype=str, skip_header=1) tol_data = np_f.replace( tol_data, '"', '' ) # the csv files have double quotes for some reason - these need to be removed tol_data = tol_data.astype(np.float) # convert remaining data to float low_tol_plot = ax.plot(tol_data[:, 0], tol_data[:, 1], linewidth=0.5, color=tol_band_color.name, label='Tolerance', linestyle=':') up_tol_plot = ax.plot(tol_data[:, 2], tol_data[:, 3], linewidth=0.5, color=tol_band_color.name, linestyle=':') lns = lns + low_tol_plot # create legend labs = [l.get_label() for l in lns] ax.legend(lns, labs, loc=3, fontsize='x-small') ax.tick_params(labelsize=5) ax.grid(linewidth=0.1) # output graph to file graph_path = data_path.replace('.csv', '.' + graph_output_file.name) if graph_output_file == Graph_Output_File.png: fig.savefig(graph_path, dpi=1000) elif graph_output_file == Graph_Output_File.pdf: make_pdf(graph_path) if auto_open: subprocess.Popen([graph_path], shell=True) #plt.show() plt.close( fig ) # must close figure, or there will be a memory error when running batch_graph_creator.py
# Bộ dữ liệu Iris có chứa cột species ở dạng chuỗi. Do đó, cần chuyển dạng này sang dạng số # Lấy các đặc trưng và lưu vào biến X X = np.genfromtxt('iris.csv', delimiter=',', dtype='float', usecols=[0, 1, 2, 3], skip_header=1) print(X.shape) # Lấy species và lưu vào biến y y = np.genfromtxt('iris.csv', delimiter=',', dtype='str', usecols=4, skip_header=1) # thay chuỗi bằng số # Sử dụng np.unique() để lấy các loại chuỗi duy nhất trong một mảng np categories = np.unique(y) print(categories) for i in range(categories.size): # hàm np_f.replace() để thay giá trị kiểu chuỗi y = np_f.replace(y, categories[i], str(i)) # đưa về kiểu float y = y.astype('float') print(y)
def handle_missing_data(data): print("Replacing irrelevant values") data = np_f.replace(data, 'NA', '0') data = data.astype(float) return data
img = imread('../data/converted/' + fname + '.jpeg') hsv = color.rgb2hsv(img) hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2]) img = color.hsv2rgb(hsv) imsave(folder + label + '/' + fname + '.png', img) full_filenames = np.genfromtxt('../data/train_filenames.txt', dtype=str) # Read the labels file full_labels = np.genfromtxt('../data/trainLabels.csv', skip_header=1, dtype=str, delimiter=',') # Keep only labels of data that can be used in training full_samples = replace(full_filenames, ".jpeg", "") full_mask = np.isin(full_labels[:, 0], full_samples) trainable_labels = np.copy(full_labels[full_mask, :]) test_labels = np.copy(full_labels[np.invert(full_mask), :]) # Downsample the zero grade, keeping only the first 5000 # Randomize order np.random.seed(1234) np.random.shuffle(trainable_labels) # Remove a part for validation n_validation = 2400 validation_labels = np.copy(trainable_labels[:n_validation, :]) trainable_labels = np.copy(trainable_labels[n_validation:, :]) # Arrange by a stable sort (mergesort)
colors = ['Blue', 'Red'] for arm_color in colors: for obj_target in dz.observation_dict['objects'] + dz.observation_dict['Standard_stars']: #Get the arc arc_run = map(float ,dz.observation_dict[obj_target + '_arc']) #This must be changed to a df arc_idx = (dz.reducDf.reduc_tag == 'arc_trim') & (dz.reducDf.RUN.isin(arc_run)) & (dz.reducDf.ISIARM == '{color} arm'.format(color = arm_color)) & (dz.reducDf.valid_file) arc_filename = dz.reducDf.loc[arc_idx, 'file_name'].values[0] arc_code = arc_filename[0:arc_filename.rfind('.')] #Get the object index_object = (dz.reducDf.reduc_tag == 'trim_image') & (dz.reducDf.frame_tag == obj_target) & (dz.reducDf.ISIARM == '{color} arm'.format(color = arm_color)) & (dz.target_validity_check()) Files_Folder = dz.reducDf.loc[index_object, 'file_location'].values Files_Name = dz.reducDf.loc[index_object, 'file_name'].values output_names = np_f.replace(Files_Name.astype(str), '.fits', '_w.fits') for j in range(len(Files_Name)): dz.task_attributes['run folder'] = Files_Folder[j] dz.task_attributes['color'] = arm_color dz.task_attributes['input'] = Files_Folder[j] + Files_Name[j] dz.task_attributes['output'] = Files_Folder[j] + output_names[j] dz.task_attributes['fitnames'] = arc_code #Moving the data base to the obj/standard folder if not os.path.exists(dz.task_attributes['run folder'] + 'database/'): os.makedirs(dz.task_attributes['run folder'] + 'database/') input_arc_code = '{in_folder}database/fc{arc_code}'.format(in_folder = dz.reducFolders['arcs'], arc_code = arc_code) output_arc_code = '{out_folder}database/fc{arc_code}'.format(out_folder = dz.task_attributes['run folder'], arc_code = arc_code) copyfile(input_arc_code, output_arc_code)
astaltObj, fib4Obj, annObj ] toronto.df = pd.read_excel( 'C:/Users/Soren/Desktop/Thesis/Data Analysis/toronto_dataset.xlsx', parse_cols="A:BK") #toronto.df = toronto.df.drop_duplicates(subset='MRN', keep='first') #toronto.df = toronto.df.loc[(toronto.df['TotalMissingnessWithSodiumGGTPlatelets'] < 0) & (toronto.df['DecompensatedCirrhosis'] == 0)] # & (toronto.df['Platelets'] > 0) & ) toronto.df = toronto.df.loc[(toronto.df['DecompensatedCirrhosis'] == 0) & (toronto.df['Albumin'] > 0) & (toronto.df['ALP'] > 0) & (toronto.df['ALT'] > 0) \ & (toronto.df['AST'] > 0) & (toronto.df['Bilirubin'] > 0) & (toronto.df['Creatinine'] > 0) & (toronto.df['INR'] > 0)\ & (toronto.df['Platelets'] > 0) & (toronto.df['BMI'] > 0) ] toronto.df = toronto.df.sample(frac=1).reset_index(drop=True) toronto.X = toronto.df.iloc[:, 0:49].values toronto.Y = toronto.df.iloc[:, 49].values toronto.Y = nd.replace( nd.replace(nd.replace(toronto.Y.astype(str), 'F 4', '4'), 'F 1', '0'), 'F 0', '0').astype(int) toronto.MRNs = toronto.df.iloc[:, 51] toronto.entryDates = toronto.df.iloc[:, 52] toronto.split = 'groupKFold' # KFold # groupKFold dft = toronto.df from sklearn.model_selection import GroupKFold from sklearn.model_selection import KFold kf = GroupKFold(n_splits=10) normalKF = KFold(n_splits=10, shuffle=True, random_state=0) kf.get_n_splits(toronto.X, toronto.Y, toronto.MRNs.astype(int)) svmObj.params = { 'method': 'label',
import numpy as np import matplotlib.pyplot as plt import numpy.core.defchararray as defCharArr x = np.arange(-1.0, 1.01, 0.01) testData = np.genfromtxt("test.txt", dtype='str') testData = defCharArr.replace(testData, ',', '.') testData = testData.astype(float) print("Входные значения:\n", testData) print("\n") plt.figure(1) plt.plot(x, testData, 'b.') sigma = 0.5 eta = 0.1 centres = [-0.8, -0.6, -0.4, -0.2, 0.0, 0.2, 0.4, 0.6, 0.8] weights = [0.2, 0.5, 0.8, 0.1, 0.88, 0.5, 0.3, 0.22, -0.2] def net(testVal, centres): return np.exp((-1 / (2 * sigma)) * ((centres - testVal)**2)) numberOfEvals = range(2000) testDataLength = len(testData) numberOfValues = np.arange(0, testDataLength, 1) for j in numberOfEvals: for i in numberOfValues:
def get_generators(n_total, batch_size, image_shape=None, type='array', zeros_left=5000): ''' Construct generators for training and validation data Zero grade images are downsampled :param n_total: number of total images to use (training plus validation) :param batch_size: batch size used in training :param image_shape: image size used in training :param zeros_left: how many images of grade zero should be left in the pool use a negative value to keep all the zeros :return: train_gen: generator of training data test_gen: generator of validation data ''' # Set the number of training samples n_train = int(np.ceil(n_total * 0.8)) n_test = int(np.floor(n_total * 0.2)) # Read filenames from a text file listing all the images full_filenames = np.genfromtxt('../data/train_filenames.txt', dtype=str) # Read the labels file full_labels = np.genfromtxt('../data/trainLabels.csv', skip_header=1, dtype=str, delimiter=',') # Keep only labels of data that can be used in training full_samples = replace(full_filenames, ".jpeg", "") full_mask = np.isin(full_labels[:, 0], full_samples) trainable_labels = np.copy(full_labels[full_mask, :]) # Downsample the zero grade, keeping only the first 5000 # Randomize order np.random.seed(1234) np.random.shuffle(trainable_labels) # Arrange by a stable sort (mergesort) trainable_labels = np.copy( trainable_labels[trainable_labels[:, 1].argsort(kind='mergesort')]) # Remove extra zeros if zeros_left > 0: _, counts = np.unique(trainable_labels[:, 1], return_counts=True) n_zeros = counts[0] downsampled_labels = np.copy(trainable_labels[(n_zeros - zeros_left):, :]) else: downsampled_labels = np.copy(trainable_labels) # Randomize and choose training data np.random.shuffle(downsampled_labels) train_labels = downsampled_labels[:n_train, :] #test_labels = downsampled_labels[n_train:(n_train + n_test)] # Exclude training samples from the original data and choose test data among them np.random.shuffle(trainable_labels) exclusion = np.isin(trainable_labels[:, 0], train_labels[:, 0], invert=True) valid_labels = np.copy(trainable_labels[exclusion, :]) test_labels = np.copy(valid_labels[:n_test, :]) # Print the counts of each class in test and train data _, train_counts = np.unique(train_labels[:, 1], return_counts=True) print("\nTrain distribution:") print(train_counts / np.sum(train_counts)) _, test_counts = np.unique(test_labels[:, 1], return_counts=True) print("\nTest distribution:") print(test_counts / np.sum(test_counts)) print("\n") if type == 'array': # Add .npy file ending train_filenames = add(train_labels[:, 0], np.full(shape=n_train, fill_value='.npy')) test_filenames = add(test_labels[:, 0], np.full(shape=n_test, fill_value='.npy')) # Add path of the data folder to the files train_filepaths = add( np.full(shape=train_filenames.shape, fill_value='../data/arrays/'), train_filenames) test_filepaths = add( np.full(shape=test_filenames.shape, fill_value='../data/arrays/'), test_filenames) # Create an instance of the image generator train_gen = ArrayGenerator(train_filepaths, train_labels[:, 1], batch_size) test_gen = ArrayGenerator(test_filepaths, test_labels[:, 1], batch_size) elif type == 'image': if image_shape is None: raise ValueError # Add .jpeg file ending train_filenames = add(train_labels[:, 0], np.full(shape=n_train, fill_value='.jpeg')) test_filenames = add(test_labels[:, 0], np.full(shape=n_test, fill_value='.jpeg')) # Add path of the data folder to the files train_filepaths = add( np.full(shape=train_filenames.shape, fill_value='../data/train/'), train_filenames) test_filepaths = add( np.full(shape=test_filenames.shape, fill_value='../data/train/'), test_filenames) # Create an instance of the image generator train_gen = ImageGenerator(train_filepaths, train_labels[:, 1], batch_size, image_shape) test_gen = ImageGenerator(test_filepaths, test_labels[:, 1], batch_size, image_shape) return train_gen, test_gen
def Normalized_Data(): from dtw import dtw import numpy as np import matplotlib.pyplot as plt import csv import itertools import numpy.core.defchararray as np_f x_normalized = [] y_normalized = [] Data1 = [] Data2 = [] Data3 = [] List = [] List2 = [] List3 = [] DTW_Sat = [] DTW_Week = [] FID = [] Longitude = [] Latitude = [] count1 = 0 count2 = 0 Regular_Sat_Location = r'C:\Users\patri\Desktop\Thesis_Final\Data_From_Extraction\M05_D07_HL_Edit_Regular_Sat.csv' Regular_Weekday_Location = r'C:\Users\patri\Desktop\Thesis_Final\Data_From_Extraction\M05_D25_HL_Edit_Regular_Weekday.csv' Memorial_Day_Location = r'C:\Users\patri\Desktop\Thesis_Final\Data_From_Extraction\M05_D28_HL_Edit_Memorial_Day_Sat.csv' with open(Regular_Sat_Location) as file: reader = csv.reader(file, delimiter=',') for column in reader: Data1.append(column[2]) FID.append(column[0]) Longitude.append(column[5]) Latitude.append(column[4]) FID.remove(FID[0]) Longitude.remove(Longitude[0]) Latitude.remove(Latitude[0]) Data1.remove(Data1[0]) with open(Regular_Weekday_Location) as file: reader = csv.reader(file, delimiter=',') for column in reader: Data2.append(column[2]) Data2.remove(Data2[0]) with open(Memorial_Day_Location) as file: reader = csv.reader(file, delimiter=',') for column in reader: Data3.append(column[2]) Data3.remove(Data3[0]) for day in Data1: result = [i.strip() for i in day.split(',')] List.append(result) Reg_Sat_List = list(itertools.chain.from_iterable(List)) for day2 in Data2: result2 = [i2.strip() for i2 in day2.split(',')] List2.append(result2) Reg_Weekday_List = list(itertools.chain.from_iterable(List2)) for day3 in Data3: result3 = [i3.strip() for i3 in day3.split(',')] List3.append(result3) Memorial_Day_List = list(itertools.chain.from_iterable(List3)) x_with_string = np.array(Reg_Sat_List).reshape(-1, 1) findx = np_f.replace(x_with_string, 'NA', '0') Reg_Sat_Input = np.array(findx, dtype=int).reshape(-1, 1) Reg_Sat_Input_split = np.split(Reg_Sat_Input, 24) y_with_string = np.array(Reg_Weekday_List).reshape(-1, 1) findy = np_f.replace(y_with_string, 'NA', '0') Reg_Weekday_Input = np.array(findy, dtype=int).reshape(-1, 1) Reg_Weekday_Input_split = np.split(Reg_Weekday_Input, 24) x_with_string = np.array(Memorial_Day_List).reshape(-1, 1) findx = np_f.replace(x_with_string, 'NA', '0') Memorial_Day_List_Input = np.array(findx, dtype=int).reshape(-1, 1) Memorial_Day_List_Input_split = np.split(Memorial_Day_List_Input, 24) for x_hour_frequency in Memorial_Day_List_Input_split: for y_hour_frequency in Reg_Sat_Input_split: max = np.amax(x_hour_frequency) x_normalized_weeks = np.true_divide(x_hour_frequency, max) max2 = np.amax(y_hour_frequency) y_normalized_weeks = np.true_divide(y_hour_frequency, max2) l2_norm = lambda x_normalized_weeks, y_normalized_weeks: ( x_normalized_weeks - y_normalized_weeks)**2 dist = dtw(x_normalized_weeks, y_normalized_weeks, dist=l2_norm) DTW_Sat.append(dist[0]) count1 += 1 count2 += 1 for x_hour_frequency in Memorial_Day_List_Input_split: for y_hour_frequency in Reg_Weekday_Input_split: max = np.amax(x_hour_frequency) x_normalized_weeks = np.true_divide(x_hour_frequency, max) max2 = np.amax(y_hour_frequency) y_normalized_weeks = np.true_divide(y_hour_frequency, max2) l2_norm = lambda x_normalized_weeks, y_normalized_weeks: ( x_normalized_weeks - y_normalized_weeks)**2 dist = dtw(x_normalized_weeks, y_normalized_weeks, dist=l2_norm) DTW_Week.append(dist[0]) count1 += 1 count2 += 1 """ #l2_norm = lambda Test_List_x, Test_List_y: (Test_List_x - Test_List_y) ** 2 #dist, cost_matrix, acc_cost_matrix, path = dtw(Test_List_x, Test_List_y, dist=l2_norm) #print(dist) #For the dynamic time warping distance, the smaller the distance, the more #similar they are. The larger the distance, the less similar they are. plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') plt.plot(path[0], path[1], 'w') plt.show() """ Big_File_Sat = zip(FID, Latitude, Longitude, DTW_Sat) Big_File_Week = zip(FID, Latitude, Longitude, DTW_Week) Sat_CSV = r"C:\Users\patri\Desktop\Thesis_Final\Results\Sat_CSV.csv" Weekday_CSV = r"C:\Users\patri\Desktop\Thesis_Final\Results\Weekday_CSV.csv" with open(Sat_CSV, 'w') as file2: csv_writer = csv.writer(file2, delimiter=',') file2.write('FID,Latitude,Longitude,DTW_Distance' + '\n') for line in Big_File_Sat: file2.write(''.join(str(line[0])) + ',' + ''.join(str(line[1])) + ',' + ''.join(str(line[2])) + ',' + ''.join(str(line[3])) + ',' + '\n') with open(Weekday_CSV, 'w') as file3: csv_writer = csv.writer(file3, delimiter=',') file3.write('FID,Latitude,Longitude,DTW_Distance' + '\n') for line in Big_File_Week: file3.write(''.join(str(line[0])) + ',' + ''.join(str(line[1])) + ',' + ''.join(str(line[2])) + ',' + ''.join(str(line[3])) + ',' + '\n')
def get_in_coding_rgn(input_dir, features): fimo_files = np.sort(np.array(glob.glob(os.path.join(input_dir, "fimo-outs/fimo-out-*")))) print('Number of fimo files = ', str(len(fimo_files))) # Get coding regions from features file (e.g. Escherichia_coli_K12_features) f = open(features, 'r') skip = 1 line = f.readline() while 'header' not in line: line = f.readline() skip += 1 f.close() features = pd.read_table(features, skiprows=skip) features = features[features.type != 'SEQ_END'] start_pos = npstr.replace(features.start_pos.values.astype(str), '<', '').astype(int) end_pos = npstr.replace(features.end_pos.values.astype(str), '>', '').astype(int) total_coding_fracs = {} for f in fimo_files: print("Processing '%s'..." % f) fimo = None try: fimo = pd.read_table(f, sep='\t') is_bad = np.zeros(fimo.shape[0], dtype=bool) for i in range(fimo.shape[0]): row = fimo.ix[i] hits = np.sum((features.contig == row['sequence name']) & (start_pos <= row.start) & (end_pos >= row.start)) if hits <= 0: hits = np.sum((features.contig == row['sequence name']) & (start_pos <= row.stop) & (end_pos >= row.stop)) if hits > 0: is_bad[i] = True # write out fimo file with new column, now we save it to new subdirectory, # 'coding_fracs/' fimo['in_coding_rgn'] = is_bad ff = os.path.basename(f).replace('fimo-out-','').replace('.bz2','') coding_frac_f = os.path.join(input_dir, 'coding_fracs/' + 'coding-fracs-' + ff + '.tsv.bz2') fimo.to_csv(coding_frac_f, sep='\t', index=False, compression="bz2") if fimo.shape[0] <= 0: continue # write out summary for each motif in the run grpd = fimo.groupby('#pattern name').mean() mean_is_bad = grpd['in_coding_rgn'].values mot_ind = grpd.index.values mean_is_bad[ mean_is_bad == True ] = 1.0 mean_is_bad[ mean_is_bad == False ] = 0.0 for i in range(len(mean_is_bad)): total_coding_fracs[ff + '_' + str(mot_ind[i])] = round(mean_is_bad[i], 4) except: # This is catching a pandas.errors.EmptyDataError # However, older versions of pandas don't have this class print('SKIPPING -- cannot read fimo output') coding_fracs = pd.DataFrame({'motif': [a for a in sorted(total_coding_fracs.keys())], 'coding_frac': [total_coding_fracs[a] for a in sorted(total_coding_fracs.keys())]}) coding_fracs.to_csv(os.path.join(input_dir, "coding_fracs.tsv.bz2"), sep='\t', index=False, compression='bz2') return coding_fracs
def create_graph(file_path, tol_path, main_side, wet_test, x_axis_min, x_axis_max, x2_axis_max, y_axis_max, data_start_index, ls_series, title, time_v_res): fig = plt.figure() ax = fig.add_subplot(111) # LS data file_name = os.path.basename(file_path).split('.')[0] data = np.genfromtxt(file_path, delimiter=',', dtype=str, skip_header=data_start_index) data = np_f.replace( data, '"', '' ) # the csv files have double quotes for some reason - these need to be removed data = data.astype(np.float) # convert remaining data to float height_array = data[:, 1] min_height = np.amin(height_array) max_height = np.amax(height_array) mid_height = (max_height - min_height) / 2 low_to_high_sweep = True if height_array[0] < mid_height else False if low_to_high_sweep: # if sweep starts low and goes high, use this algorithm first_transition_index = int(np.argwhere(height_array > mid_height)[0]) second_transition_index = int( np.argwhere(height_array[first_transition_index:] < mid_height - 10)[0]) + first_transition_index end_height_index = np.argmin(height_array[second_transition_index:( first_transition_index + second_transition_index)]) + second_transition_index else: # if sweep starts high and goes low, use this algorithm first_transition_index = int(np.argwhere(height_array < mid_height)[0]) second_transition_index = int( np.argwhere(height_array[first_transition_index:] > mid_height + 10)[0]) + first_transition_index end_height_index = np.argmax(height_array[second_transition_index:( first_transition_index + second_transition_index)]) + second_transition_index data = data[:end_height_index, :] show_tolerance = True if ls_series in ['MLS', 'E7x', 'F1x', 'F2x' ] else False # Tolerance data if show_tolerance: if ls_series == 'MLS' and main_side == 'True' and wet_test == 'True': tol_file_name = 'MLS Tolerance (MS, wet).csv' if ls_series == 'MLS' and main_side == 'True' and wet_test == 'False': tol_file_name = 'MLS Tolerance (MS, dry).csv' if ls_series == 'MLS' and main_side == 'False' and wet_test == 'True': tol_file_name = 'MLS Tolerance (SS, wet).csv' if ls_series == 'MLS' and main_side == 'False' and wet_test == 'False': tol_file_name = 'MLS Tolerance (SS, dry).csv' if ls_series == 'E7x' and main_side == 'True' and wet_test == 'True': tol_file_name = 'E7x Tolerance (MS, wet).csv' if ls_series == 'F1x' and main_side == 'True' and wet_test == 'True': tol_file_name = 'F1x Tolerance (MS, wet).csv' if ls_series == 'F2x' and main_side == 'True' and wet_test == 'True': tol_file_name = 'F2x Tolerance (MS, wet).csv' if ls_series == 'E7x' and main_side == 'False' and wet_test == 'True': tol_file_name = 'E7x Tolerance (SS, wet).csv' if ls_series == 'F1x' and main_side == 'False' and wet_test == 'True': tol_file_name = 'F1x Tolerance (SS, wet).csv' if ls_series == 'E7x' and main_side == 'True' and wet_test == 'False': tol_file_name = 'E7x Tolerance (MS, dry).csv' if ls_series == 'F1x' and main_side == 'True' and wet_test == 'False': tol_file_name = 'F1x Tolerance (MS, dry).csv' if ls_series == 'F2x' and main_side == 'True' and wet_test == 'False': tol_file_name = 'F2x Tolerance (MS, dry).csv' if ls_series == 'E7x' and main_side == 'False' and wet_test == 'False': tol_file_name = 'E7x Tolerance (SS, dry).csv' if ls_series == 'F1x' and main_side == 'False' and wet_test == 'False': tol_file_name = 'F1x Tolerance (SS, dry).csv' tol_color = 'black' if main_side == 'True' else 'red' tol_file_path = os.path.join(tol_path, tol_file_name) tol_data = np.genfromtxt(tol_file_path, delimiter=',', dtype=str, skip_header=1) tol_data = np_f.replace( tol_data, '"', '' ) # the csv files have double quotes for some reason - these need to be removed tol_data = tol_data.astype(np.float) # convert remaining data to float if wet_test == 'True': time_axis_inc = 10 else: time_axis_inc = 1 time = data[:, 0] sys_height = data[:, 1] ls_ohms = data[:, 4] if low_to_high_sweep: min_indice = np.argmax(sys_height) empty_to_full = ls_ohms[:min_indice] full_to_empty = ls_ohms[min_indice:] empty_to_full_sys = sys_height[:min_indice] full_to_empty_sys = sys_height[min_indice:] else: min_indice = np.argmin(sys_height) empty_to_full = ls_ohms[min_indice:] full_to_empty = ls_ohms[:min_indice] empty_to_full_sys = sys_height[min_indice:] full_to_empty_sys = sys_height[:min_indice] plot_time_v_res = True if ls_series == 'MLS' or time_v_res == 'True' else False # height vs resistance ax.set_xlim(int(x_axis_min), int(x_axis_max)) ax.set_ylim(0, float(y_axis_max)) e_to_f_plot = ax.plot(empty_to_full_sys, empty_to_full, linewidth=0.5, label='R vs. H - fill', color='blue') f_to_e_plot = ax.plot(full_to_empty_sys, full_to_empty, linewidth=0.5, label='R vs. H - drain', color='magenta') ax.set_xlabel('Height / mm', fontsize=7) ax.set_ylabel(r'Resistance / $\Omega$', fontsize=7) start, end = ax.get_xlim() if plot_time_v_res: ax.xaxis.set_ticks(np.arange(int(start), end + 10, 10)) else: increment = roundup((end - start) / 25) ax.xaxis.set_ticks(np.arange(int(start), end, 50)) lns = e_to_f_plot + f_to_e_plot # time vs resistance if plot_time_v_res: ax2 = ax.twiny() ax2.set_xlim(0, int(x2_axis_max)) #ax2.set_ylim([0, float(y_axis_max)]) r_vs_t = ax2.plot(time, ls_ohms, linewidth=0.5, label='Resistance vs. Time', color='orange') ax2.set_xlabel('Time / s', fontsize=7) start, end = ax2.get_xlim() ax2.xaxis.set_ticks(np.arange(start, end, time_axis_inc)) lns = lns + r_vs_t ax2.tick_params(labelsize=5) # tolerance bands if show_tolerance: low_tol_plot = ax.plot(tol_data[:, 0], tol_data[:, 1], linewidth=0.5, color=tol_color, label='Tolerance', linestyle=':') up_tol_plot = ax.plot(tol_data[:, 2], tol_data[:, 3], linewidth=0.5, color=tol_color, linestyle=':') lns = lns + low_tol_plot # create legend labs = [l.get_label() for l in lns] ax.legend(lns, labs, loc=3, fontsize='x-small') ax.tick_params(labelsize=5) ax.grid(linewidth=0.1) if plot_time_v_res: ax.set_title(title, fontsize=10, y=1.08) # this raises the title to fit the top x-axis else: ax.set_title(title, fontsize=10) start, end = ax.get_ylim() ax.yaxis.set_ticks(np.arange(0, end, 50)) if plot_time_v_res: fig.savefig(file_path.replace('.csv', '.png'), dpi=1000) else: make_pdf(file_path) #plt.show() plt.close( fig ) # must close figure, or there will be a memory error when running batch_graph_creator.py
def run(): parser = argparse.ArgumentParser(description="Examples: \n" +\ "calc_spectra data/vega.pkl data/vega/ -i 0.000 1.5707963267948966 150; " +\ "calc_spectra data/vega.pkl data/vega/ -i 0.088418; " +\ "calc_spectra data/altair.pkl data/altair/ -i 0.8840; " +\ "calc_spectra data/achernar.pkl data/achernar/ -i 1.0577") parser.add_argument("pkl_sfile", help="the pickled star file") parser.add_argument("output", help="the output directory") parser.add_argument( '-i', type=float, nargs='+', help='either a single inclination in radians ' + 'or a equally spaced values specified by minimum, maximum and number', required=True) parser.add_argument("-m", help="longitudinal integration method: 0=cubic(default), 1=trapezoidal", type=int, \ default=0) args = parser.parse_args() ## inputs pkl_sfile = args.pkl_sfile # pickled star file output = args.output # output location # integration method if args.m == 0: m = 'cubic' elif args.m == 1: m = 'trapezoid' else: sys.exit( "Longitudinal integration method should be either 0 (cubic) or 1 (trapezoidal)." ) # inclinations i = args.i li = len(i) if li not in [1, 3]: sys.exit("Please specify either a single inclination in radians (one number) " +\ "or a range specified by minimum, maximum and step (three numbers).") elif li == 1: inclinations = np.array(i) # decimal precision of inclination for printout prec = 6 elif li == 3: mi, ma, num = i inclinations = np.linspace(mi, ma, num=int(num)) # decimal precision of inclination for printout prec = np.int(np.ceil(-np.log10((ma - mi) / num))) leni = len(inclinations) # unpickle the star with open(pkl_sfile, 'rb') as f: st = pickle.load(f) # get the wavelengths at which we see light from this star wl = st.wavelengths ## write the spectra of the star in text format # create the directory if it doesn't exist if not os.path.exists(output): os.mkdir(output) # filenames if not output.endswith('/'): output += '/' filename = os.path.splitext(os.path.basename(pkl_sfile))[0] inc_str = np.array([("%." + str(prec) + "f") % x for x in np.round(inclinations, decimals=prec)]) ofiles = ch.add(output + filename, inc_str) ofiles = ch.replace(ofiles, '.', '_') ofiles = ch.add(ofiles, '.txt') for i, ofile in np.ndenumerate(ofiles): # message if i[0] % 10 == 0: print( str(i[0]) + " out of " + str(leni) + " inclinations calculated.") sys.stdout.flush() # current inclination inc = inclinations[i] # calculate the spectrum or the magnitudes light = st.integrate(inc, method=m) # create this file if it doesn't exist, open it for writing f = open(ofile, 'w+') # write the header f.write('# luminosity: ' + str(st.luminosity) + '\n') f.write('# omega: ' + str(st.surface.omega) + '\n') f.write('# inclination(rad): ' + str(inclinations[i]) + '\n') f.write('# mass: ' + str(st.mass) + '\n') f.write('# Req: ' + str(st.Req) + '\n') f.write('# distance: ' + format(st.distance, '.2e') + ' cm\n') f.write('# A_V: ' + format(*(st.a_v), '.2f') + '\n') f.write('# number of upper half z values: ' + str(st.map.nz) + '\n') # write the spectrum to the file f.write('\n') if st.bands is None: # spectrum mode f.write('# wavelength(nm)\tflux(ergs/s/Hz/ster)\n') for j, w in np.ndenumerate(wl): f.write(str(w)) f.write('\t %.5E' % light[j]) f.write('\n') else: # photometry mode f.write('# filter\twavelength(nm)\tmagnitude\n') for j, w in enumerate(wl): f.write(st.bands[j]) f.write('\t %.6g' % w) f.write('\t %.8f' % light[j]) f.write('\n') f.close()
rank_attr top50 ####################################################### #### LGBM ####################################################### ##Test 1: Normal dataset_y = accidents_train[ 'Accident_Severity' ] dataset_x = accidents_train.drop([ 'Accident_Severity' ], axis= 1 ) X_train, X_test, y_train, y_test = train_test_split(dataset_x, dataset_y, test_size= 0.20 , random_state= 42 ) col = np.array(X_train.columns , dtype = str) col = np_f.replace(col, ':', '=') X_train.columns = col X_test.columns = col #1) LGBM normal print('LGBM - Normal') model = lgb.LGBMClassifier(random_state = 42) model.fit(X_train, y_train, eval_metric='multi_logloss',eval_set=[(X_test, y_test)],early_stopping_rounds=50) y_probas=model.predict_proba(X_test) roc = roc_auc_score(np.where(y_test==1 , 1 ,0), y_probas[:,0]) skplt.metrics.plot_roc_curve(y_test, y_probas) plt.show() saveResults[2][0] = roc