def pd_read_csv_data_from_dir(self, input_dir, extension_str='.csv'): ''' Input: input_dir: '/input/dir' Output: feature_ary = np.array data_x = np.array data_y = np.array ''' file_obj = FileList() file_ary = file_obj.find_file(input_dir, extension_str) df_list = [] for temp_file in file_ary: temp_df = self.pd_read_csv_data(temp_file) df_list.append(temp_df) result_df = pd.concat(df_list, axis=0) ## reset all index result_df = result_df.set_index(np.arange(result_df.shape[0])) return result_df
def __init__(self, game): Group.__init__(self) self.game = game self.need_draw = True self.key_test_period = 0.25 self.__tick = 0 self.rom_executor = RomExecutor() self.game_list = RomDataItemsConstructor( game.app.config.get("PATHS", "gamelist")) self.sdcard_constructor = DirlistItemConstructor( game.app.config.get("PATHS", "sdcard"), Executor()) self.all_constructors = [ BaseItemConstructor(), # for favorites self.game_list.getConsole("GEN"), # for gen self.game_list.getConsole("SMS"), # for sms self.game_list.getConsole("NES"), # for nes self.game_list.getConsole("SNES"), # for snes self.sdcard_constructor, None ] self.item_constructor = self.all_constructors[0] self.bg = Background(game) self.title_text = TextSprite("", game.assets["TITLE_FONT"]) self.update_title_text(self.game.assets["ICONS"][0]['title']) self.title_text.centered = True self.platform = MainMenuPlatformList(game) self.file_list = FileList(game, self.item_constructor) self.file_list.deselect_all() self.selector_state = MainStage.SELECTRO_ICONS #self.add(*[self.bg, self.title_text, self.platform, self.file_list]) game.app.input.addEvent(input.Input.EVENT_DOWN, self.nextItem) game.app.input.addEvent(input.Input.EVENT_UP, self.lastItem) game.app.input.addEvent(input.Input.EVENT_NEXT, self.select) game.app.input.addEvent(input.Input.EVENT_BACK, self.selectBack) game.app.input.addEvent(input.Input.EVENT_LEFT, self.last10Item_list) game.app.input.addEvent(input.Input.EVENT_RIGHT, self.next10Item_list) self.parts = [self.title_text, self.platform, self.file_list]
def get_csv_data_from_dir(self, input_dir, outcome_index=26, extension_str='.csv'): ''' Input: input_dir: '/input/dir' Output: feature_ary = np.array data_x = np.array data_y = np.array ''' file_obj = FileList() file_ary = file_obj.find_file(input_dir, extension_str) data_x = [] data_y = [] data_x = np.array(data_x) data_y = np.array(data_y) for temp_index, temp_file in enumerate(file_ary): features_ary, temp_data_x, temp_data_y = self.read_csv_data( temp_file, outcome_index) if temp_index == 0: data_x = temp_data_x data_y = temp_data_y else: data_x = np.concatenate((data_x, temp_data_x), axis=0) data_y = np.concatenate((data_y, temp_data_y), axis=0) print(temp_file) print(len(temp_data_x)) print(len(data_x)) return features_ary, data_x, data_y
def _fill_content(self): hpaned = gtk.HPaned() hpaned.set_position(200) self.main_vbox.pack_start(hpaned, expand=True, fill=True) scrolled_window = gtk.ScrolledWindow() scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) hpaned.pack1(scrolled_window, resize=False) self.__file_list = FileList(self.notebook) scrolled_window.add(self.__file_list) self.__file_list.connect('open-file', self.on_file_list_open_file) hpaned.pack2(self.nb_widget, resize=True) self.nb_widget.set_scrollable(True)
def _fill_content(self): hpaned = gtk.HPaned() position = self.state.get_pane_position() if position == -1: hpaned.set_position(200) else: hpaned.set_position(position) hpaned.connect('notify::position', self.on_hpaned_notify_position) self.main_vbox.pack_start(hpaned, expand=True, fill=True) scrolled_window = gtk.ScrolledWindow() scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) hpaned.pack1(scrolled_window, resize=False) self.__file_list = FileList(self.notebook) scrolled_window.add(self.__file_list) self.__file_list.connect('open-file', self.on_file_list_open_file) self.__file_list.connect('close-file', self.on_file_list_close_file) self.__file_list.connect('rename-file', self.on_file_list_rename_file) self.__file_list.connect('delete-file', self.on_file_list_delete_file) hpaned.pack2(self.nb_widget, resize=True) self.nb_widget.set_scrollable(True)
def change_path(self, path=None, selected=None): """Change file list path.""" if path is not None and not path.startswith('trash://'): path = 'trash:///' FileList.change_path(self, path, selected)
def gpr_file_test(): file_path_ary = [ '/home/ryan/smb_data/CytoOneArray/RD/完成報告', '/home/ryan/smb_data/CytoOneArray/RD/審查中報告', '/home/ryan/smb_data/brank_data/For Brank/GPR' ] # file_path_ary = ["/home/ryan/smb_data/brank_data/For Brank/GPR"] # file_path = '/home/ryan/smb_data/CytoOneArray/RD/完成報告/華聯/2014' file_ext = 'gpr' gpr_file_list = '/home/ryan/src_dir/CytoOA_AI/data/gpr_file_list.txt' missing_file_list = '/home/ryan/src_dir/CytoOA_AI/data/missing_file_list.txt' match_file_list = '/home/ryan/src_dir/CytoOA_AI/data/match_file_list.txt' file_obj = FileList() file_ary = [] for file_path in file_path_ary: file_ary += file_obj.find_file(file_path, file_ext) fh_writer = open(gpr_file_list, 'w') fh_missing = open(missing_file_list, 'w') fh_match = open(match_file_list, 'w') all_id = [] array_id_2_path_dict = {} for temp_file in file_ary: # print(temp_file) fh_writer.write(temp_file + "\n") file_name = file_obj.get_gpr_code(temp_file) # print(file_name) ### recording array id to file path array_id_2_path_dict[file_name[0]] = temp_file all_id += file_name fh_writer.close() # print(all_id) gpr_id_dict = list_2_dict(all_id) ### excel_reader = ExcelReader() input_file = '/home/ryan/src_dir/CytoOA_AI/data/Cyto_Report_summary2.xlsx' # input_file = '/home/ryan/src_dir/CytoOA_AI/data/Cyto_Report_summary.xls' excel_df = excel_reader.read_excel(input_file) excel_gpr_id = get_gpr_id_from_excel_df(excel_df) excel_grp_dict = list_2_dict(excel_gpr_id) hit_count = 0 miss_count = 0 total_count = 0 miss_id = [] for key, value in excel_grp_dict.items(): if key in gpr_id_dict: hit_count += 1 fh_match.write(str(key) + "\t" + array_id_2_path_dict[key] + "\n") else: miss_count += 1 miss_id.append(key) fh_missing.write(str(key) + "\n") total_count += 1 print("Missing id = ") print(miss_id) print("Hit count = {}".format(hit_count)) print("Miss_count = {}".format(miss_count)) print("Total_count = {}".format(total_count)) fh_writer.close() fh_missing.close() fh_match.close()
class MainStage(Group): app = None SELECTRO_ICONS = "icons" SELECTOR_LIST = "list" def __init__(self, game): Group.__init__(self) self.game = game self.need_draw = True self.key_test_period = 0.25 self.__tick = 0 self.rom_executor = RomExecutor() self.game_list = RomDataItemsConstructor( game.app.config.get("PATHS", "gamelist")) self.sdcard_constructor = DirlistItemConstructor( game.app.config.get("PATHS", "sdcard"), Executor()) self.all_constructors = [ BaseItemConstructor(), # for favorites self.game_list.getConsole("GEN"), # for gen self.game_list.getConsole("SMS"), # for sms self.game_list.getConsole("NES"), # for nes self.game_list.getConsole("SNES"), # for snes self.sdcard_constructor, None ] self.item_constructor = self.all_constructors[0] self.bg = Background(game) self.title_text = TextSprite("", game.assets["TITLE_FONT"]) self.update_title_text(self.game.assets["ICONS"][0]['title']) self.title_text.centered = True self.platform = MainMenuPlatformList(game) self.file_list = FileList(game, self.item_constructor) self.file_list.deselect_all() self.selector_state = MainStage.SELECTRO_ICONS #self.add(*[self.bg, self.title_text, self.platform, self.file_list]) game.app.input.addEvent(input.Input.EVENT_DOWN, self.nextItem) game.app.input.addEvent(input.Input.EVENT_UP, self.lastItem) game.app.input.addEvent(input.Input.EVENT_NEXT, self.select) game.app.input.addEvent(input.Input.EVENT_BACK, self.selectBack) game.app.input.addEvent(input.Input.EVENT_LEFT, self.last10Item_list) game.app.input.addEvent(input.Input.EVENT_RIGHT, self.next10Item_list) self.parts = [self.title_text, self.platform, self.file_list] # end of init def next10Item_list(self): if (self.selector_state != MainStage.SELECTOR_LIST): return self.file_list.selected = ( 1 + self.file_list.selected // self.file_list.ITEMS_PER_PAGE) * self.file_list.ITEMS_PER_PAGE def last10Item_list(self): if (self.selector_state != MainStage.SELECTOR_LIST): return self.file_list.selected = ( -1 + self.file_list.selected // self.file_list.ITEMS_PER_PAGE) * self.file_list.ITEMS_PER_PAGE def nextItem(self): self.lastNextItem(1) # end of nextItem def lastItem(self): self.lastNextItem(-1) def lastNextItem(self, dir): self.__tick = 0 if (self.selector_state == MainStage.SELECTRO_ICONS): self.platform.selected += dir _title = self.game.assets["ICONS"][self.platform.selected]['title'] self.update_title_text(_title) self.item_constructor = self.all_constructors[ self.platform.selected] self.file_list.set_items( self.item_constructor, not isinstance(self.item_constructor, DirlistItemConstructor)) self.file_list.deselect_all() else: self.file_list.selected += dir # end of lastNextItem def select(self): if (self.selector_state == MainStage.SELECTRO_ICONS): self.selector_state = MainStage.SELECTOR_LIST self.file_list.selected = 0 return if (self.selector_state == MainStage.SELECTOR_LIST): if (isinstance(self.item_constructor, DirlistItemConstructor)): if self.item_constructor.next(self.file_list.selected): self.file_list.set_items(self.item_constructor) else: rom = self.item_constructor.all[self.file_list.selected] print(self.rom_executor.exec(rom)) # end of select def selectBack(self): if (self.selector_state == MainStage.SELECTOR_LIST): self.selector_state = MainStage.SELECTRO_ICONS self.file_list.deselect_all() # end of select def update_title_text(self, text): self.title_text.set_text(text) self.title_text.pos = [ 455 - self.title_text.rect.w / 2, 70 - self.title_text.rect.h / 2 ] # end of update_title_text def update(self, dt): Group.update(self, dt) self.platform.update(dt) self.__tick += dt if (self.__tick >= self.key_test_period): self.__tick = 0 if (self.game.app.input.keys[input.Input.EVENT_UP]): self.lastItem() if (self.game.app.input.keys[input.Input.EVENT_DOWN]): self.nextItem() # end of update def draw(self, renderer): #Group.draw(self, renderer) if (self.need_draw): self.bg.draw(renderer) self.need_draw = False _updated = False for p in self.parts: if (p.need_draw): rs = p.last_rect if (not isinstance(rs, list)): rs = [rs] for r in rs: renderer.blit(self.bg.image, r, r) p.draw(renderer) _updated = True return _updated
class NotebookWindow(BaseNotebookWindow): UI_STRING=""" <ui> <menubar name="TopMenu"> <menu action="file"> <menuitem action="new-notebook"/> <menuitem action="open-notebook"/> <menuitem action="notebook-properties"/> <separator/> <menuitem action="new-worksheet"/> <menuitem action="new-library"/> <menuitem action="open"/> <menuitem action="save"/> <menuitem action="rename"/> <separator/> <menuitem action="page-setup"/> <menuitem action="print"/> <menuitem action="export-to-pdf"/> <separator/> <menuitem action="close"/> <menuitem action="quit"/> </menu> <menu action="edit"> <menuitem action="cut"/> <menuitem action="copy"/> <menuitem action="copy-as-doctests"/> <menuitem action="paste"/> <menuitem action="delete"/> <separator/> <menuitem action="calculate"/> <menuitem action="calculate-to-line"/> <menuitem action="break"/> <separator/> <menuitem action="calculate-all"/> <separator/> <menuitem action="preferences"/> </menu> <menu action="help"> <menuitem action="online-documentation"/> <separator/> <menuitem action="about"/> </menu> </menubar> <toolbar name="ToolBar"> <toolitem action="save"/> <separator/> <toolitem action="calculate"/> <toolitem action="break"/> </toolbar> <accelerator action="switch-tab-1"/> <accelerator action="switch-tab-2"/> <accelerator action="switch-tab-3"/> <accelerator action="switch-tab-4"/> <accelerator action="switch-tab-5"/> <accelerator action="switch-tab-6"/> <accelerator action="switch-tab-7"/> <accelerator action="switch-tab-8"/> <accelerator action="switch-tab-9"/> <accelerator action="switch-tab-10"/> </ui> """ def __init__(self, notebook): BaseNotebookWindow.__init__(self, notebook) self.window.set_default_size(800, 800) ####################################################### # Overrides ####################################################### def _fill_content(self): hpaned = gtk.HPaned() position = self.state.get_pane_position() if position == -1: hpaned.set_position(200) else: hpaned.set_position(position) hpaned.connect('notify::position', self.on_hpaned_notify_position) self.main_vbox.pack_start(hpaned, expand=True, fill=True) scrolled_window = gtk.ScrolledWindow() scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) hpaned.pack1(scrolled_window, resize=False) self.__file_list = FileList(self.notebook) scrolled_window.add(self.__file_list) self.__file_list.connect('open-file', self.on_file_list_open_file) self.__file_list.connect('close-file', self.on_file_list_close_file) self.__file_list.connect('rename-file', self.on_file_list_rename_file) self.__file_list.connect('delete-file', self.on_file_list_delete_file) hpaned.pack2(self.nb_widget, resize=True) self.nb_widget.set_scrollable(True) def _add_editor(self, editor): # Set first since _add_editor() calls _update_editor_title() editor._notebook_tab_label = gtk.Label() editor._notebook_tab_status = gtk.Image() editor._notebook_tab_status.props.icon_size = gtk.ICON_SIZE_MENU BaseNotebookWindow._add_editor(self, editor) label_widget = gtk.HBox(False, 4) label_widget.pack_start(editor._notebook_tab_status, True, True, 0) label_widget.pack_start(editor._notebook_tab_label, True, True, 0) tab_button = gtk.Button() tab_button.set_name('notebook-close-button') tab_button.set_relief(gtk.RELIEF_NONE) tab_button.props.can_focus = False tab_button.connect('clicked', lambda *args: self.on_tab_close_button_clicked(editor)) label_widget.pack_start(tab_button, False, False, 0) close = gtk.image_new_from_stock('gtk-close', gtk.ICON_SIZE_MENU) tab_button.add(close) label_widget.show_all() self.nb_widget.set_tab_label(editor.widget, label_widget) self.nb_widget.set_tab_reorderable(editor.widget, True) def _update_editor_title(self, editor): BaseNotebookWindow._update_editor_title(self, editor) editor._notebook_tab_label.set_text(editor.title) def _update_editor_state(self, editor): BaseNotebookWindow._update_editor_state(self, editor) editor._notebook_tab_status.props.stock = NotebookFile.stock_id_for_state(editor.state) ####################################################### # Callbacks ####################################################### def on_tab_close_button_clicked(self, editor): self._close_editor(editor) def on_file_list_open_file(self, file_list, file): self.open_file(file) def on_file_list_close_file(self, file_list, file): for editor in self.editors: if editor.file == file: self._close_editor(editor) def on_file_list_rename_file(self, file_list, file): if file.active: # If we have the file open, we need to rename via the editor for editor in self.editors: if editor.file == file: editor.rename() # Reselect the new item in the list new_file = self.notebook.file_for_absolute_path(editor.filename) file_list.select_file(new_file) else: # Otherwise do it directly def check_name(name): return name != "" and name != file.path def do_rename(new_path): old_path = os.path.join(self.notebook.folder, file.path) os.rename(old_path, new_path) self.notebook.refresh() # Reselect the new item in the list new_file = self.notebook.file_for_absolute_path(new_path) file_list.select_file(new_file) if isinstance(file, WorksheetFile): extension = "rws" validate_name = WorksheetEditor.validate_name elif isinstance(file, LibraryFile): extension = "py" validate_name = LibraryEditor.validate_name else: extension = "" validate_name = None title = "Rename '%s'" % file.path builder = SaveFileBuilder(title, file.path, "Rename", validate_name, check_name) builder.dialog.set_transient_for(self.window) builder.name_entry.set_text(file.path) builder.prompt_for_name(self.notebook.folder, extension, do_rename) builder.dialog.destroy() def on_file_list_delete_file(self, file_list, file): dialog = gtk.MessageDialog(parent=self.window, buttons=gtk.BUTTONS_NONE, type=gtk.MESSAGE_WARNING) message = format_escaped("<big><b>Really delete '%s'?</b></big>", file.path) dialog.set_markup(message) dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_DELETE, gtk.RESPONSE_OK) dialog.set_default_response(gtk.RESPONSE_CANCEL) response = dialog.run() dialog.destroy() if response != gtk.RESPONSE_OK: return for editor in self.editors: if editor.file == file: self._close_editor(editor) abspath = os.path.join(self.notebook.folder, file.path) os.remove(abspath) self.notebook.refresh() def on_hpaned_notify_position(self, pane, gparamspec): self.state.set_pane_position(pane.get_property('position'))
def __init__(self): self.file_obj = FileList() pass
def __init__(self, parent, notebook, options): FileList.__init__(self, parent, notebook, options)
class NotebookWindow(BaseNotebookWindow): UI_STRING=""" <ui> <menubar name="TopMenu"> <menu action="file"> <menuitem action="new-notebook"/> <menuitem action="open-notebook"/> <menuitem action="notebook-properties"/> <separator/> <menuitem action="new-worksheet"/> <menuitem action="new-library"/> <menuitem action="open"/> <menuitem action="save"/> <menuitem action="rename"/> <menuitem action="close"/> <separator/> <menuitem action="quit"/> </menu> <menu action="edit"> <menuitem action="cut"/> <menuitem action="copy"/> <menuitem action="copy-as-doctests"/> <menuitem action="paste"/> <menuitem action="delete"/> <separator/> <menuitem action="calculate"/> <menuitem action="break"/> <separator/> <menuitem action="calculate-all"/> </menu> <menu action="help"> <menuitem action="about"/> </menu> </menubar> <toolbar name="ToolBar"> <toolitem action="save"/> <separator/> <toolitem action="calculate"/> <toolitem action="break"/> </toolbar> </ui> """ def __init__(self, notebook): BaseNotebookWindow.__init__(self, notebook) self.window.set_default_size(800, 800) ####################################################### # Overrides ####################################################### def _fill_content(self): hpaned = gtk.HPaned() hpaned.set_position(200) self.main_vbox.pack_start(hpaned, expand=True, fill=True) scrolled_window = gtk.ScrolledWindow() scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) hpaned.pack1(scrolled_window, resize=False) self.__file_list = FileList(self.notebook) scrolled_window.add(self.__file_list) self.__file_list.connect('open-file', self.on_file_list_open_file) hpaned.pack2(self.nb_widget, resize=True) self.nb_widget.set_scrollable(True) def _add_editor(self, editor): # Set first since _add_editor() calls _update_editor_title() editor._notebook_tab_label = gtk.Label() editor._notebook_tab_status = gtk.Image() editor._notebook_tab_status.props.icon_size = gtk.ICON_SIZE_MENU BaseNotebookWindow._add_editor(self, editor) label_widget = gtk.HBox(False, 4) label_widget.pack_start(editor._notebook_tab_status, True, True, 0) label_widget.pack_start(editor._notebook_tab_label, True, True, 0) tab_button = gtk.Button() tab_button.set_name('notebook-close-button') tab_button.set_relief(gtk.RELIEF_NONE) tab_button.props.can_focus = False tab_button.connect('clicked', lambda *args: self.on_tab_close_button_clicked(editor)) label_widget.pack_start(tab_button, False, False, 0) close = gtk.image_new_from_stock('gtk-close', gtk.ICON_SIZE_MENU) tab_button.add(close) label_widget.show_all() self.nb_widget.set_tab_label(editor.widget, label_widget) self.nb_widget.set_tab_reorderable(editor.widget, True) def _update_editor_title(self, editor): BaseNotebookWindow._update_editor_title(self, editor) editor._notebook_tab_label.set_text(editor.title) def _update_editor_state(self, editor): BaseNotebookWindow._update_editor_state(self, editor) editor._notebook_tab_status.props.stock = NotebookFile.stock_id_for_state(editor.state) ####################################################### # Callbacks ####################################################### def on_tab_close_button_clicked(self, editor): self._close_editor(editor) def on_file_list_open_file(self, file_list, file): self.open_file(file)
def change_path(self, path=None, selected=None): """Change file list path.""" if path is not None and not path.startswith('trash:'): path = self.get_provider().get_root_path(None) FileList.change_path(self, path, selected)
class DataReader(object): def __init__(self): self.file_obj = FileList() pass ## print(tf.__version__) def build_array_with_cnv_to_gene(self, cnv_df, array_2_gene, gene_2_array, array_id_key='Array_ID'): ''' Function: 只保留R分析後,有CNV的資料。 r analysis cnv result to gene symbol. ''' print("In build_array_with_cnv_to_gene ...") # probe_id_list = list(cnv_df.columns.values) cnv_ary = cnv_df.to_dict(orient='records') # for temp_index,row in cnv_df.iterrows(): result_ary = [] total_count = 0 hit_count = 0 no_hit_count = 0 for cnv_dict in cnv_ary: # print(cnv_dict) gene_cnv_dict = {} array_id = cnv_dict[array_id_key] gene_cnv_dict[array_id_key] = array_id sample_hit_gene_dict = {} ### sample with cnv if array_id in array_2_gene: sample_hit_gene_dict = self.list_2_dict(array_2_gene[array_id]) for gene_symbol, value in gene_2_array.items(): hit_flag = 0 if gene_symbol in sample_hit_gene_dict: hit_flag = 1 # print(array_id, gene_symbol) else: hit_flag = 0 gene_cnv_dict[gene_symbol] = hit_flag result_ary.append(gene_cnv_dict) hit_count += 1 else: no_hit_count += 1 total_count += 1 gene_df = pd.DataFrame.from_dict(result_ary) print("Total_count = {}, hit_count = {}, no_hit_count = {}.".format( total_count, hit_count, no_hit_count)) # probe_id_list = list(gene_df.columns.values) # print(probe_id_list) # print(gene_df) # print(len(cnv_df)) # print(cnv_df[array_id_key]) print("Out build_cnv_to_gene ...") return gene_df def get_cnv_gainloss_to_gene_table(self, input_file, array_index=3, gain_loss_index=4, gene_index=8): """ Function: 將array_id和gene_symbol gain/loss的mapping file整理成Dict. 回傳兩種型態. array_2_gene和gene_2_array Input: input_file: 為array_id對應gene_symbol的summary file. array_index: column number of array_id. gainloss_index: indec of gain/loss field. gene_index: column number fo gene. """ fh_input = open(input_file, 'r') fh_csv = csv.reader(fh_input, delimiter='\t') ## header string # # row = next(fh_csv) # # features_ary = np.array(row) array_2_gene = {} array_id = '' gene_symbol = '' gene_2_array = {} for row in fh_csv: # print(row) array_id = row[array_index] gene_symbol = row[gene_index] gain_loss_int = row[gain_loss_index] gene_symbol_with_gain_loss = '' if gain_loss_int == 1: gene_symbol_with_gain_loss = str(gene_symbol) + "_Gain" else: gene_symbol_with_gain_loss = str(gene_symbol) + "_Loss" if not array_id in array_2_gene: array_2_gene[array_id] = [] array_2_gene[array_id].append(gene_symbol_with_gain_loss) if not gene_symbol_with_gain_loss in gene_2_array: gene_2_array[gene_symbol_with_gain_loss] = [] gene_2_array[gene_symbol_with_gain_loss].append(array_id) fh_input.close() ### remove redundant data. refine_array_2_gene = {} for array_id, gene_ary in array_2_gene.items(): gene_ary = list(set(gene_ary)) refine_array_2_gene[array_id] = gene_ary refine_gene_2_array = {} for gene_symbol, array_id_ary in gene_2_array.items(): array_id_ary = list(set(array_id_ary)) refine_gene_2_array[gene_symbol] = array_id_ary return refine_array_2_gene, refine_gene_2_array def build_region_40_summary_with_gainloss_file(self, input_path, summary_file): ''' 整理region40的結果,包含gain/loss資訊(1/-1). 輸出格式為: chr,start,end,gpr_id,gain/loss ''' fh_output = open(summary_file, 'w') fh_output.write('chr\tstart\tend\tarray_id\tgain_loss\n') feature_ary = ['Chromosome', 'Start', 'End', 'Gain_loss'] region_40_dict = self.read_region_40_file_pipeline(input_path) cnv_count = 0 normal_count = 0 for gpr_id, data_df in region_40_dict.items(): if not data_df is None: ary_len = len(data_df) feature_list = data_df.loc[:, feature_ary].values print(gpr_id, ary_len) # print(feature_list) for temp_chr, temp_start, temp_end, temp_gain_loss in feature_list: temp_gain_loss_int = 0 temp_gain_loss = str(temp_gain_loss) if temp_gain_loss == "Loss": temp_gain_loss_int = -1 elif temp_gain_loss == "Gain": temp_gain_loss_int = 1 else: temp_gain_loss_int = 0 temp_start = re.sub(r'[,]', '', temp_start) temp_end = re.sub(r'[,]', '', temp_end) if temp_chr == 23: temp_chr = 'X' elif temp_chr == 24: temp_chr = 'Y' else: pass print( temp_chr, temp_start, temp_end, temp_gain_loss, temp_gain_loss_int, ) fh_output.write('chr{}\t{}\t{}\t{}\t{}\n'.format( temp_chr, temp_start, temp_end, gpr_id, temp_gain_loss_int)) # print(data_df) cnv_count += 1 else: print("Normal. {} .....".format(gpr_id)) normal_count += 1 print("cnv_count = {}, normal_count = {}.".format( cnv_count, normal_count)) print("Output file = {}".format(summary_file)) fh_output.close() def build_region_40_summary_file(self, input_path, summary_file): ''' 整理region40的結果. 輸出格式為: chr,start,end,gpr_id ''' fh_output = open(summary_file, 'w') fh_output.write('chr\tstart\tend\tarray_id\n') feature_ary = ['Chromosome', 'Start', 'End'] region_40_dict = self.read_region_40_file_pipeline(input_path) cnv_count = 0 normal_count = 0 for gpr_id, data_df in region_40_dict.items(): if not data_df is None: ary_len = len(data_df) feature_list = data_df.loc[:, feature_ary].values print(gpr_id, ary_len) # print(feature_list) for temp_chr, temp_start, temp_end in feature_list: temp_start = re.sub(r'[,]', '', temp_start) temp_end = re.sub(r'[,]', '', temp_end) if temp_chr == 23: temp_chr = 'X' elif temp_chr == 24: temp_chr = 'Y' else: pass print(temp_chr, temp_start, temp_end) fh_output.write('chr{}\t{}\t{}\t{}\n'.format( temp_chr, temp_start, temp_end, gpr_id)) # print(data_df) cnv_count += 1 else: print("Normal. {} .....".format(gpr_id)) normal_count += 1 print("cnv_count = {}, normal_count = {}.".format( cnv_count, normal_count)) print("Output file = {}".format(summary_file)) fh_output.close() def list_2_dict(self, input_list): result_dict = dict((tmp_key, tmp_key) for tmp_key in input_list) return result_dict def build_array_to_gene(self, cnv_df, array_2_gene, gene_2_array, array_id_key='Array_ID'): ''' Function: r analysis cnv result to gene symbol. ''' print("In build_cnv_to_gene ...") # probe_id_list = list(cnv_df.columns.values) cnv_ary = cnv_df.to_dict(orient='records') # for temp_index,row in cnv_df.iterrows(): result_ary = [] for cnv_dict in cnv_ary: # print(cnv_dict) gene_cnv_dict = {} array_id = cnv_dict[array_id_key] gene_cnv_dict[array_id_key] = array_id sample_hit_gene_dict = {} ### sample with cnv if array_id in array_2_gene: sample_hit_gene_dict = self.list_2_dict(array_2_gene[array_id]) else: pass for gene_symbol, value in gene_2_array.items(): hit_flag = 0 if gene_symbol in sample_hit_gene_dict: hit_flag = 1 print(array_id, gene_symbol) else: hit_flag = 0 gene_cnv_dict[gene_symbol] = hit_flag result_ary.append(gene_cnv_dict) gene_df = pd.DataFrame.from_dict(result_ary) # probe_id_list = list(gene_df.columns.values) # print(probe_id_list) # print(gene_df) # print(len(cnv_df)) # print(cnv_df[array_id_key]) print("Out build_cnv_to_gene ...") return gene_df def get_cnv_to_gene_table(self, input_file, array_index=3, gene_index=7): """ Function: 將array_id和gene_symbol的mapping file整理成Dict. 回傳兩種型態. array_2_gene和gene_2_array Input: input_file: 為array_id對應gene_symbol的summary file. array_index: column number of array_id. gene_index: column number fo gene. """ fh_input = open(input_file, 'r') fh_csv = csv.reader(fh_input, delimiter='\t') ## header string # # row = next(fh_csv) # # features_ary = np.array(row) array_2_gene = {} array_id = '' gene_symbol = '' gene_2_array = {} for row in fh_csv: # print(row) array_id = row[array_index] gene_symbol = row[gene_index] if not array_id in array_2_gene: array_2_gene[array_id] = [] array_2_gene[array_id].append(gene_symbol) if not gene_symbol in gene_2_array: gene_2_array[gene_symbol] = [] gene_2_array[gene_symbol].append(array_id) fh_input.close() ### remove redundant data. refine_array_2_gene = {} for array_id, gene_ary in array_2_gene.items(): gene_ary = list(set(gene_ary)) refine_array_2_gene[array_id] = gene_ary refine_gene_2_array = {} for gene_symbol, array_id_ary in gene_2_array.items(): array_id_ary = list(set(array_id_ary)) refine_gene_2_array[gene_symbol] = array_id_ary return refine_array_2_gene, refine_gene_2_array def build_cnv_to_gene(self, cnv_df, gene_2_probe, array_id_key='Array_ID'): print("In build_cnv_to_gene ...") # probe_id_list = list(cnv_df.columns.values) cnv_ary = cnv_df.to_dict(orient='records') # for temp_index,row in cnv_df.iterrows(): result_ary = [] for cnv_dict in cnv_ary: # print(cnv_dict) gene_cnv_dict = {} gene_cnv_dict[array_id_key] = cnv_dict[array_id_key] for gene_symbol, value in gene_2_probe.items(): temp_sum = 0.0 temp_count = 0 for probe_id in value: if probe_id in cnv_dict: try: temp_sum += float(cnv_dict[probe_id]) temp_count += 1 except: temp_sum += 0.0 if temp_count == 0: temp_count = 1 temp_sum = temp_sum / temp_count gene_cnv_dict[gene_symbol] = temp_sum result_ary.append(gene_cnv_dict) gene_df = pd.DataFrame.from_dict(result_ary) # probe_id_list = list(gene_df.columns.values) # print(probe_id_list) # print(gene_df) # print(len(cnv_df)) # print(cnv_df[array_id_key]) print("Out build_cnv_to_gene ...") return gene_df def get_probe_to_gene_table(self, input_file, probe_index=3, gene_index=7): """ Input: input_file: csv file. probe_index: column number of probe. gene_index: column number fo gene. """ fh_input = open(input_file, 'r') fh_csv = csv.reader(fh_input, delimiter='\t') ## header string # # row = next(fh_csv) # # features_ary = np.array(row) probe_2_gene = {} probe_id = '' gene_symbol = '' gene_2_probe = {} for row in fh_csv: # print(row) probe_id = row[probe_index] gene_symbol = row[gene_index] probe_2_gene[probe_id] = gene_symbol if not gene_symbol in gene_2_probe: gene_2_probe[gene_symbol] = [] gene_2_probe[gene_symbol].append(probe_id) fh_input.close() return probe_2_gene, gene_2_probe def combine_outcome_data(self, cnv_df, outcome_dict, combine_column='Array_ID'): ''' ''' outcome_df = pd.DataFrame.from_dict(outcome_dict, orient='index') outcome_df.columns = ['cnv_outcome'] outcome_df[combine_column] = outcome_df.index merge_df = pd.merge(cnv_df, outcome_df) # print(merge_df) return merge_df def cnv_data_reader_pipeline(self, input_path): ''' Function: 資料來源為CNV結果,(R分析之後的結果) 產生以probe_id為column的data frame. value為log2 ration. ''' probe_ary = [] ### 紀錄probe_id的聯集 columns_dict = {'Array_ID': 'Array_ID'} cnv_all_df = None file_list = self.file_obj.get_all_probe_bind_file(input_path) gpr_code = '' file_count = len(file_list) temp_count = 0 for temp_file in file_list: gpr_code = self.file_obj.get_gpr_code_from_path(temp_file) ## probe_info_dict ={ probe_id:log2, ...} probe_info_dict = self.region_40_file_reader_to_dict( temp_file, gpr_code[0], columns_dict) probe_ary.append(probe_info_dict) temp_count += 1 print("%s/%s, File = %s" % (temp_count, file_count, temp_file)) print("gpr_code = %s" % (gpr_code[0])) print("columns_dict len = %s" % (len(columns_dict))) # ### debug info # if temp_count ==20: # break data_dict = {} log2_value = 0 for probe_info_dict in probe_ary: for temp_key in columns_dict.keys(): if temp_key in probe_info_dict: log2_value = probe_info_dict[temp_key] else: log2_value = 0 if not temp_key in data_dict: data_dict[temp_key] = [] data_dict[temp_key].append(log2_value) cnv_all_df = pd.DataFrame.from_dict(data_dict) # print(cnv_all_df) # print(cnv_all_df.info()) return cnv_all_df def cnv_data_reader(self, input_file, array_id, colunm_tag='ID', value_tag='log2'): data_df = self.region_40_file_reader(input_file) # data_frame_t = data_df.set_index(colunm_tag).T ### get certain row as value serial_log2 = data_df.loc[:, [colunm_tag, value_tag]] ### set array_id as index and transpose as columns serial_log2 = serial_log2.set_index(colunm_tag).T ### assign array_id to index label serial_log2 = serial_log2.rename(index={value_tag: array_id}) ### adding array_id column serial_log2['Array_ID'] = serial_log2.index return serial_log2 def cnv_data_reader_pipeline_bak(self, input_path): ''' ''' cnv_all_df = None file_list = self.file_obj.get_all_probe_bind_file(input_path) gpr_code = '' file_count = len(file_list) temp_count = 0 for temp_file in file_list: gpr_code = self.file_obj.get_gpr_code_from_path(temp_file) cnv_df = self.cnv_data_reader(temp_file, gpr_code[0]) if temp_count > 0: cnv_all_df = cnv_all_df.append(cnv_df.copy()) else: cnv_all_df = cnv_df.copy() temp_count += 1 # print(type(cnv_all_df)) print("%s/%s, File = %s" % (temp_count, file_count, temp_file)) print("gpr_code = %s" % (gpr_code[0])) return cnv_all_df def cnv_data_reader_bak(self, input_file, array_id, colunm_tag='ID', value_tag='log2'): data_df = self.region_40_file_reader(input_file) ### set array_id as index and transpose as columns data_frame_t = data_df.set_index(colunm_tag).T ### get certain row as value serial_log2 = data_frame_t.loc[value_tag, :] ## convert serials to data_frame data_frame_t_log2 = serial_log2.to_frame() data_frame_t_log2 = data_frame_t_log2.T ### assign array_id to index label data_frame_t_log2 = data_frame_t_log2.rename( index={value_tag: array_id}) ### adding array_id column data_frame_t_log2['Array_ID'] = data_frame_t_log2.index # print(type(data_frame_t_log2)) return data_frame_t_log2 def read_all_probe_bind_pipeline(self, input_path): ''' Output: result_dict: {} key: gpr_id, value: cnv_df, pandas dataframe ''' file_list = self.file_obj.get_all_probe_bind_file(input_path) gpr_code = '' result_dict = {} file_count = len(file_list) temp_count = 0 for temp_file in file_list: print("%s/%s, File = %s" % (temp_count, file_count, temp_file)) cnv_df = None gpr_code = self.file_obj.get_gpr_code_from_path(temp_file) cnv_df = self.region_40_file_reader(temp_file) result_dict[gpr_code[0]] = cnv_df print("gpr_code = %s" % (gpr_code)) temp_count += 1 # result_df = self.pd_read_csv_data(input_file) return result_dict def read_region_40_file_pipeline(self, input_path): ''' Output: result_dict: {} key: gpr_id, value: cnv_df, pandas dataframe ''' region_40_list = self.file_obj.get_region_40_file(input_path) gpr_code = '' result_dict = {} for temp_file in region_40_list: print("File = %s" % (temp_file)) cnv_df = None gpr_code = self.file_obj.get_gpr_code_from_path(temp_file) cnv_df = self.region_40_file_reader(temp_file) result_dict[gpr_code[0]] = cnv_df print("gpr_code = %s" % (gpr_code)) # result_df = self.pd_read_csv_data(input_file) return result_dict def region_40_file_reader_to_dict(self, input_file, array_id, record_dict): ''' Function: 讀取region_40/All_probe_bind的結果,紀錄probe_id和對應的log2_ratio。 根據header來判斷檔案內是否有資料。 Input: input_file: tab format input file. record_dict: 記錄所有probe_id. Output: pandas data frame. ''' probe_id_index = 0 log2_index = 5 result_df = None fh_input = open(input_file, 'r', encoding="latin1") header_str = next(fh_input) temp_ary = header_str.split('\t') probe_dict = {} probe_dict['Array_ID'] = array_id if len(temp_ary) > 1: for temp_str in fh_input: temp_ary = temp_str.split('\t') probe_id = temp_ary[probe_id_index] log2_value = temp_ary[log2_index] probe_dict[probe_id] = log2_value record_dict[probe_id] = probe_id fh_input.close() return probe_dict def region_40_file_reader(self, input_file): ''' Function: 根據header來判斷檔案內是否有資料。 Input: tab format input file. Output: pandas data frame. ''' result_df = None fh_input = open(input_file, 'r') header_str = next(fh_input) temp_ary = header_str.split('\t') fh_input.close() if len(temp_ary) > 1: result_df = self.pd_read_csv_data(input_file) return result_df def read_blast_count_data(self, sys_obj, input_dir): accessnum_index = sys_obj.get_accessnum_index() blast_count_index = sys_obj.get_blast_count_index() blast_count_df = self.pd_read_csv_data_from_dir(input_dir) blast_value_temp = blast_count_df.copy() blast_value_temp.iloc[:, accessnum_index] = blast_value_temp.iloc[:, accessnum_index].str.replace( ",", "" ).str.replace( ".", "") blast_value_temp = blast_value_temp.iloc[:, [ accessnum_index, blast_count_index ]] blast_count_refine = blast_value_temp blast_count_refine.columns = ["Specimen_ID", "Blast_count"] # blast_count_refine = blast_value_temp.rename(columns={"Accessnum":"Specimen ID"}) # print(blast_count_refine) # print("Total blast_count_refine = {0} .".format(len(blast_count_refine))) return blast_count_refine def pd_read_csv_data_from_dir(self, input_dir, extension_str='.csv'): ''' Input: input_dir: '/input/dir' Output: feature_ary = np.array data_x = np.array data_y = np.array ''' file_obj = FileList() file_ary = file_obj.find_file(input_dir, extension_str) df_list = [] for temp_file in file_ary: temp_df = self.pd_read_csv_data(temp_file) df_list.append(temp_df) result_df = pd.concat(df_list, axis=0) ## reset all index result_df = result_df.set_index(np.arange(result_df.shape[0])) return result_df def pd_read_csv_data(self, input_file): data_frame = pd.read_csv(input_file, delimiter='\t', encoding="latin1") # print(data_frame) return data_frame def read_csv_data(self, input_file, outcome_index=26): ''' feature_ary = np.array (header) data_x = np.array data_y = np.array ''' fh_input = open(input_file, 'r') # csv_input = csv.reader.(fh_input, delimiter=',', quotechar='"') fh_csv = csv.reader(fh_input, delimiter=',') ## header string row = next(fh_csv) features_ary = np.array(row) data_x = [] data_y = [] for row in fh_csv: data_x.append(row) data_y.append(row[outcome_index]) data_x = np.array(data_x) data_y = np.array(data_y) fh_input.close() return features_ary, data_x, data_y def get_csv_data_from_dir(self, input_dir, outcome_index=26, extension_str='.csv'): ''' Input: input_dir: '/input/dir' Output: feature_ary = np.array data_x = np.array data_y = np.array ''' file_obj = FileList() file_ary = file_obj.find_file(input_dir, extension_str) data_x = [] data_y = [] data_x = np.array(data_x) data_y = np.array(data_y) for temp_index, temp_file in enumerate(file_ary): features_ary, temp_data_x, temp_data_y = self.read_csv_data( temp_file, outcome_index) if temp_index == 0: data_x = temp_data_x data_y = temp_data_y else: data_x = np.concatenate((data_x, temp_data_x), axis=0) data_y = np.concatenate((data_y, temp_data_y), axis=0) print(temp_file) print(len(temp_data_x)) print(len(data_x)) return features_ary, data_x, data_y def pd_read_txt(self, data_dir, file_path): obj = pd.read_csv(data_dir + file_path, sep='\t', header=None, names=['Array_ID', 'tif_path']) return obj def tif_ary_reader(self, data_df, path_column, y_label): records_num = len(data_df.index) print('How many records:', records_num) data_ary = [] for i in range(records_num): path = data_df.loc[i, path_column] y = data_df.loc[i, y_label] im = Image.open(path) img_ary = np.array(im) # scaling the input img_ary = np.divide(img_ary, 255).astype('uint8') # uint16 -> uint8 # img_ary = img_ary.astype('uint8') if img_ary.shape[0] > img_ary.shape[1]: print('*** Img transpose ***:') print(i) print('Img shape origin:') print(img_ary.shape) img_ary = np.transpose(img_ary) print('After transpose:') print(img_ary.shape) ## 為了以image augmentation 解決 imbalance,旋轉放在後面流程處理,寫在 data_processing裡面 resized_img_ary = cv2.resize( img_ary, (1220, 432)) #(610, 216) cv2.resize 跟 array.shape 的顯示是顛倒的 ... # M = cv2.getRotationMatrix2D((img_px_size/2, img_px_size/2), randint(-10, 10), 1) # rotated_img_ary = cv2.warpAffine(resized_img_ary, M, (img_px_size, img_px_size)) im.close() data_ary.append([resized_img_ary, y]) return data_ary def png_ary_reader(self, data_df, path_column, y_label): records_num = len(data_df.index) print('How many records:', records_num) data_ary = [] crop_box = (60, 100, 2369, 800) # 左右: 切到邊框 上下: 切到 +/- 3 for i in range(records_num): print('Fig ', i, ' processing... \n') path = data_df.loc[i, path_column] y = data_df.loc[i, y_label] im = Image.open(path) crop_im = im.crop(crop_box) img_ary = np.array(crop_im) # scaling the input # img_ary = np.divide(img_ary, 255).astype('uint8') # uint16 -> uint8 # img_ary = img_ary.astype('uint8') # if img_ary.shape[0] > img_ary.shape[1]: # print('*** Img transpose ***:') # print(i) # print('Img shape origin:') # print(img_ary.shape) # img_ary = np.transpose(img_ary) # print('After transpose:') # print(img_ary.shape) ## After crop -> 2309 * 800 resized_img_ary = cv2.resize( img_ary, (1150, 400)) #(610, 216) cv2.resize 跟 array.shape 的顯示是顛倒的 ... # M = cv2.getRotationMatrix2D((img_px_size/2, img_px_size/2), randint(-10, 10), 1) # rotated_img_ary = cv2.warpAffine(resized_img_ary, M, (img_px_size, img_px_size)) im.close() data_ary.append([resized_img_ary, y]) return data_ary