def pd_read_csv_data_from_dir(self, input_dir, extension_str='.csv'):
        '''
		Input:
			input_dir: '/input/dir'
		Output:
			feature_ary = np.array
			data_x = np.array
			data_y = np.array

		'''
        file_obj = FileList()

        file_ary = file_obj.find_file(input_dir, extension_str)

        df_list = []
        for temp_file in file_ary:
            temp_df = self.pd_read_csv_data(temp_file)

            df_list.append(temp_df)

        result_df = pd.concat(df_list, axis=0)

        ## reset all index
        result_df = result_df.set_index(np.arange(result_df.shape[0]))

        return result_df
Example #2
0
    def __init__(self, game):
        Group.__init__(self)

        self.game = game
        self.need_draw = True

        self.key_test_period = 0.25
        self.__tick = 0

        self.rom_executor = RomExecutor()
        self.game_list = RomDataItemsConstructor(
            game.app.config.get("PATHS", "gamelist"))

        self.sdcard_constructor = DirlistItemConstructor(
            game.app.config.get("PATHS", "sdcard"), Executor())

        self.all_constructors = [
            BaseItemConstructor(),  # for favorites
            self.game_list.getConsole("GEN"),  # for gen
            self.game_list.getConsole("SMS"),  # for sms
            self.game_list.getConsole("NES"),  # for nes
            self.game_list.getConsole("SNES"),  # for snes
            self.sdcard_constructor,
            None
        ]

        self.item_constructor = self.all_constructors[0]

        self.bg = Background(game)

        self.title_text = TextSprite("", game.assets["TITLE_FONT"])
        self.update_title_text(self.game.assets["ICONS"][0]['title'])

        self.title_text.centered = True

        self.platform = MainMenuPlatformList(game)
        self.file_list = FileList(game, self.item_constructor)
        self.file_list.deselect_all()

        self.selector_state = MainStage.SELECTRO_ICONS

        #self.add(*[self.bg, self.title_text, self.platform, self.file_list])

        game.app.input.addEvent(input.Input.EVENT_DOWN, self.nextItem)
        game.app.input.addEvent(input.Input.EVENT_UP, self.lastItem)
        game.app.input.addEvent(input.Input.EVENT_NEXT, self.select)
        game.app.input.addEvent(input.Input.EVENT_BACK, self.selectBack)

        game.app.input.addEvent(input.Input.EVENT_LEFT, self.last10Item_list)
        game.app.input.addEvent(input.Input.EVENT_RIGHT, self.next10Item_list)

        self.parts = [self.title_text, self.platform, self.file_list]
    def get_csv_data_from_dir(self,
                              input_dir,
                              outcome_index=26,
                              extension_str='.csv'):
        '''
		Input:
			input_dir: '/input/dir'
		Output:
			feature_ary = np.array
			data_x = np.array
			data_y = np.array

		'''
        file_obj = FileList()

        file_ary = file_obj.find_file(input_dir, extension_str)
        data_x = []
        data_y = []

        data_x = np.array(data_x)
        data_y = np.array(data_y)

        for temp_index, temp_file in enumerate(file_ary):
            features_ary, temp_data_x, temp_data_y = self.read_csv_data(
                temp_file, outcome_index)

            if temp_index == 0:
                data_x = temp_data_x
                data_y = temp_data_y
            else:
                data_x = np.concatenate((data_x, temp_data_x), axis=0)
                data_y = np.concatenate((data_y, temp_data_y), axis=0)

            print(temp_file)
            print(len(temp_data_x))
            print(len(data_x))

        return features_ary, data_x, data_y
Example #4
0
    def _fill_content(self):
        hpaned = gtk.HPaned()
        hpaned.set_position(200)
        self.main_vbox.pack_start(hpaned, expand=True, fill=True)

        scrolled_window = gtk.ScrolledWindow()
        scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
        hpaned.pack1(scrolled_window, resize=False)

        self.__file_list = FileList(self.notebook)
        scrolled_window.add(self.__file_list)
        self.__file_list.connect('open-file', self.on_file_list_open_file)

        hpaned.pack2(self.nb_widget, resize=True)

        self.nb_widget.set_scrollable(True)
    def _fill_content(self):
        hpaned = gtk.HPaned()
        position = self.state.get_pane_position()
        if position == -1:
            hpaned.set_position(200)
        else:
            hpaned.set_position(position)
        hpaned.connect('notify::position', self.on_hpaned_notify_position)
        self.main_vbox.pack_start(hpaned, expand=True, fill=True)

        scrolled_window = gtk.ScrolledWindow()
        scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
        hpaned.pack1(scrolled_window, resize=False)

        self.__file_list = FileList(self.notebook)
        scrolled_window.add(self.__file_list)
        self.__file_list.connect('open-file', self.on_file_list_open_file)
        self.__file_list.connect('close-file', self.on_file_list_close_file)
        self.__file_list.connect('rename-file', self.on_file_list_rename_file)
        self.__file_list.connect('delete-file', self.on_file_list_delete_file)

        hpaned.pack2(self.nb_widget, resize=True)

        self.nb_widget.set_scrollable(True)
Example #6
0
	def change_path(self, path=None, selected=None):
		"""Change file list path."""
		if path is not None and not path.startswith('trash://'):
			path = 'trash:///'

		FileList.change_path(self, path, selected)
def gpr_file_test():
    file_path_ary = [
        '/home/ryan/smb_data/CytoOneArray/RD/完成報告',
        '/home/ryan/smb_data/CytoOneArray/RD/審查中報告',
        '/home/ryan/smb_data/brank_data/For Brank/GPR'
    ]
    # file_path_ary = ["/home/ryan/smb_data/brank_data/For Brank/GPR"]

    # file_path = '/home/ryan/smb_data/CytoOneArray/RD/完成報告/華聯/2014'
    file_ext = 'gpr'
    gpr_file_list = '/home/ryan/src_dir/CytoOA_AI/data/gpr_file_list.txt'
    missing_file_list = '/home/ryan/src_dir/CytoOA_AI/data/missing_file_list.txt'
    match_file_list = '/home/ryan/src_dir/CytoOA_AI/data/match_file_list.txt'
    file_obj = FileList()
    file_ary = []
    for file_path in file_path_ary:
        file_ary += file_obj.find_file(file_path, file_ext)

    fh_writer = open(gpr_file_list, 'w')
    fh_missing = open(missing_file_list, 'w')
    fh_match = open(match_file_list, 'w')

    all_id = []
    array_id_2_path_dict = {}

    for temp_file in file_ary:
        # print(temp_file)

        fh_writer.write(temp_file + "\n")
        file_name = file_obj.get_gpr_code(temp_file)
        # print(file_name)

        ### recording array id to file path
        array_id_2_path_dict[file_name[0]] = temp_file
        all_id += file_name

    fh_writer.close()
    # print(all_id)
    gpr_id_dict = list_2_dict(all_id)

    ###
    excel_reader = ExcelReader()
    input_file = '/home/ryan/src_dir/CytoOA_AI/data/Cyto_Report_summary2.xlsx'
    # input_file = '/home/ryan/src_dir/CytoOA_AI/data/Cyto_Report_summary.xls'
    excel_df = excel_reader.read_excel(input_file)

    excel_gpr_id = get_gpr_id_from_excel_df(excel_df)

    excel_grp_dict = list_2_dict(excel_gpr_id)

    hit_count = 0
    miss_count = 0
    total_count = 0
    miss_id = []
    for key, value in excel_grp_dict.items():

        if key in gpr_id_dict:
            hit_count += 1
            fh_match.write(str(key) + "\t" + array_id_2_path_dict[key] + "\n")
        else:
            miss_count += 1
            miss_id.append(key)
            fh_missing.write(str(key) + "\n")
        total_count += 1

    print("Missing id = ")
    print(miss_id)

    print("Hit count = {}".format(hit_count))
    print("Miss_count = {}".format(miss_count))
    print("Total_count = {}".format(total_count))

    fh_writer.close()
    fh_missing.close()
    fh_match.close()
Example #8
0
class MainStage(Group):

    app = None
    SELECTRO_ICONS = "icons"
    SELECTOR_LIST = "list"

    def __init__(self, game):
        Group.__init__(self)

        self.game = game
        self.need_draw = True

        self.key_test_period = 0.25
        self.__tick = 0

        self.rom_executor = RomExecutor()
        self.game_list = RomDataItemsConstructor(
            game.app.config.get("PATHS", "gamelist"))

        self.sdcard_constructor = DirlistItemConstructor(
            game.app.config.get("PATHS", "sdcard"), Executor())

        self.all_constructors = [
            BaseItemConstructor(),  # for favorites
            self.game_list.getConsole("GEN"),  # for gen
            self.game_list.getConsole("SMS"),  # for sms
            self.game_list.getConsole("NES"),  # for nes
            self.game_list.getConsole("SNES"),  # for snes
            self.sdcard_constructor,
            None
        ]

        self.item_constructor = self.all_constructors[0]

        self.bg = Background(game)

        self.title_text = TextSprite("", game.assets["TITLE_FONT"])
        self.update_title_text(self.game.assets["ICONS"][0]['title'])

        self.title_text.centered = True

        self.platform = MainMenuPlatformList(game)
        self.file_list = FileList(game, self.item_constructor)
        self.file_list.deselect_all()

        self.selector_state = MainStage.SELECTRO_ICONS

        #self.add(*[self.bg, self.title_text, self.platform, self.file_list])

        game.app.input.addEvent(input.Input.EVENT_DOWN, self.nextItem)
        game.app.input.addEvent(input.Input.EVENT_UP, self.lastItem)
        game.app.input.addEvent(input.Input.EVENT_NEXT, self.select)
        game.app.input.addEvent(input.Input.EVENT_BACK, self.selectBack)

        game.app.input.addEvent(input.Input.EVENT_LEFT, self.last10Item_list)
        game.app.input.addEvent(input.Input.EVENT_RIGHT, self.next10Item_list)

        self.parts = [self.title_text, self.platform, self.file_list]

    # end of init

    def next10Item_list(self):
        if (self.selector_state != MainStage.SELECTOR_LIST):
            return
        self.file_list.selected = (
            1 + self.file_list.selected //
            self.file_list.ITEMS_PER_PAGE) * self.file_list.ITEMS_PER_PAGE

    def last10Item_list(self):
        if (self.selector_state != MainStage.SELECTOR_LIST):
            return
        self.file_list.selected = (
            -1 + self.file_list.selected //
            self.file_list.ITEMS_PER_PAGE) * self.file_list.ITEMS_PER_PAGE

    def nextItem(self):
        self.lastNextItem(1)

    # end of nextItem

    def lastItem(self):
        self.lastNextItem(-1)

    def lastNextItem(self, dir):
        self.__tick = 0
        if (self.selector_state == MainStage.SELECTRO_ICONS):
            self.platform.selected += dir
            _title = self.game.assets["ICONS"][self.platform.selected]['title']
            self.update_title_text(_title)

            self.item_constructor = self.all_constructors[
                self.platform.selected]
            self.file_list.set_items(
                self.item_constructor,
                not isinstance(self.item_constructor, DirlistItemConstructor))
            self.file_list.deselect_all()
        else:
            self.file_list.selected += dir

    # end of lastNextItem

    def select(self):
        if (self.selector_state == MainStage.SELECTRO_ICONS):
            self.selector_state = MainStage.SELECTOR_LIST
            self.file_list.selected = 0
            return

        if (self.selector_state == MainStage.SELECTOR_LIST):

            if (isinstance(self.item_constructor, DirlistItemConstructor)):
                if self.item_constructor.next(self.file_list.selected):
                    self.file_list.set_items(self.item_constructor)
            else:
                rom = self.item_constructor.all[self.file_list.selected]
                print(self.rom_executor.exec(rom))

    # end of select

    def selectBack(self):

        if (self.selector_state == MainStage.SELECTOR_LIST):
            self.selector_state = MainStage.SELECTRO_ICONS
            self.file_list.deselect_all()

    # end of select

    def update_title_text(self, text):

        self.title_text.set_text(text)
        self.title_text.pos = [
            455 - self.title_text.rect.w / 2, 70 - self.title_text.rect.h / 2
        ]

    # end of update_title_text

    def update(self, dt):

        Group.update(self, dt)
        self.platform.update(dt)

        self.__tick += dt
        if (self.__tick >= self.key_test_period):
            self.__tick = 0
            if (self.game.app.input.keys[input.Input.EVENT_UP]):
                self.lastItem()
            if (self.game.app.input.keys[input.Input.EVENT_DOWN]):
                self.nextItem()

    # end of update

    def draw(self, renderer):
        #Group.draw(self, renderer)

        if (self.need_draw):
            self.bg.draw(renderer)
            self.need_draw = False

        _updated = False

        for p in self.parts:
            if (p.need_draw):

                rs = p.last_rect
                if (not isinstance(rs, list)):
                    rs = [rs]
                for r in rs:
                    renderer.blit(self.bg.image, r, r)

                p.draw(renderer)

                _updated = True

        return _updated
class NotebookWindow(BaseNotebookWindow):
    UI_STRING="""
<ui>
   <menubar name="TopMenu">
      <menu action="file">
         <menuitem action="new-notebook"/>
         <menuitem action="open-notebook"/>
         <menuitem action="notebook-properties"/>
         <separator/>
         <menuitem action="new-worksheet"/>
         <menuitem action="new-library"/>
         <menuitem action="open"/>
         <menuitem action="save"/>
         <menuitem action="rename"/>
         <separator/>
         <menuitem action="page-setup"/>
         <menuitem action="print"/>
         <menuitem action="export-to-pdf"/>
         <separator/>
         <menuitem action="close"/>
         <menuitem action="quit"/>
      </menu>
      <menu action="edit">
         <menuitem action="cut"/>
         <menuitem action="copy"/>
         <menuitem action="copy-as-doctests"/>
         <menuitem action="paste"/>
         <menuitem action="delete"/>
         <separator/>
         <menuitem action="calculate"/>
         <menuitem action="calculate-to-line"/>
         <menuitem action="break"/>
         <separator/>
         <menuitem action="calculate-all"/>
         <separator/>
         <menuitem action="preferences"/>
      </menu>
      <menu action="help">
        <menuitem action="online-documentation"/>
        <separator/>
        <menuitem action="about"/>
      </menu>
   </menubar>
   <toolbar name="ToolBar">
      <toolitem action="save"/>
      <separator/>
      <toolitem action="calculate"/>
      <toolitem action="break"/>
   </toolbar>
   <accelerator action="switch-tab-1"/>
   <accelerator action="switch-tab-2"/>
   <accelerator action="switch-tab-3"/>
   <accelerator action="switch-tab-4"/>
   <accelerator action="switch-tab-5"/>
   <accelerator action="switch-tab-6"/>
   <accelerator action="switch-tab-7"/>
   <accelerator action="switch-tab-8"/>
   <accelerator action="switch-tab-9"/>
   <accelerator action="switch-tab-10"/>
</ui>
"""
    def __init__(self, notebook):
        BaseNotebookWindow.__init__(self, notebook)

        self.window.set_default_size(800, 800)

    #######################################################
    # Overrides
    #######################################################

    def _fill_content(self):
        hpaned = gtk.HPaned()
        position = self.state.get_pane_position()
        if position == -1:
            hpaned.set_position(200)
        else:
            hpaned.set_position(position)
        hpaned.connect('notify::position', self.on_hpaned_notify_position)
        self.main_vbox.pack_start(hpaned, expand=True, fill=True)

        scrolled_window = gtk.ScrolledWindow()
        scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
        hpaned.pack1(scrolled_window, resize=False)

        self.__file_list = FileList(self.notebook)
        scrolled_window.add(self.__file_list)
        self.__file_list.connect('open-file', self.on_file_list_open_file)
        self.__file_list.connect('close-file', self.on_file_list_close_file)
        self.__file_list.connect('rename-file', self.on_file_list_rename_file)
        self.__file_list.connect('delete-file', self.on_file_list_delete_file)

        hpaned.pack2(self.nb_widget, resize=True)

        self.nb_widget.set_scrollable(True)

    def _add_editor(self, editor):
        # Set first since _add_editor() calls _update_editor_title()
        editor._notebook_tab_label = gtk.Label()
        editor._notebook_tab_status = gtk.Image()
        editor._notebook_tab_status.props.icon_size = gtk.ICON_SIZE_MENU
        BaseNotebookWindow._add_editor(self, editor)

        label_widget = gtk.HBox(False, 4)
        label_widget.pack_start(editor._notebook_tab_status, True, True, 0)
        label_widget.pack_start(editor._notebook_tab_label, True, True, 0)
        tab_button = gtk.Button()
        tab_button.set_name('notebook-close-button')
        tab_button.set_relief(gtk.RELIEF_NONE)
        tab_button.props.can_focus = False
        tab_button.connect('clicked', lambda *args: self.on_tab_close_button_clicked(editor))
        label_widget.pack_start(tab_button, False, False, 0)
        close = gtk.image_new_from_stock('gtk-close', gtk.ICON_SIZE_MENU)
        tab_button.add(close)
        label_widget.show_all()

        self.nb_widget.set_tab_label(editor.widget, label_widget)

        self.nb_widget.set_tab_reorderable(editor.widget, True)

    def _update_editor_title(self, editor):
        BaseNotebookWindow._update_editor_title(self, editor)
        editor._notebook_tab_label.set_text(editor.title)

    def _update_editor_state(self, editor):
        BaseNotebookWindow._update_editor_state(self, editor)
        editor._notebook_tab_status.props.stock = NotebookFile.stock_id_for_state(editor.state)

    #######################################################
    # Callbacks
    #######################################################

    def on_tab_close_button_clicked(self, editor):
        self._close_editor(editor)

    def on_file_list_open_file(self, file_list, file):
        self.open_file(file)

    def on_file_list_close_file(self, file_list, file):
        for editor in self.editors:
            if editor.file == file:
                self._close_editor(editor)

    def on_file_list_rename_file(self, file_list, file):
        if file.active:
            # If we have the file open, we need to rename via the editor
            for editor in self.editors:
                if editor.file == file:
                    editor.rename()
                # Reselect the new item in the list
                new_file = self.notebook.file_for_absolute_path(editor.filename)
                file_list.select_file(new_file)
        else:
            # Otherwise do it directly
            def check_name(name):
                return name != "" and name != file.path

            def do_rename(new_path):
                old_path = os.path.join(self.notebook.folder, file.path)
                os.rename(old_path, new_path)
                self.notebook.refresh()

                # Reselect the new item in the list
                new_file = self.notebook.file_for_absolute_path(new_path)
                file_list.select_file(new_file)

            if isinstance(file, WorksheetFile):
                extension = "rws"
                validate_name = WorksheetEditor.validate_name
            elif isinstance(file, LibraryFile):
                extension = "py"
                validate_name = LibraryEditor.validate_name
            else:
                extension = ""
                validate_name = None

            title = "Rename '%s'" % file.path
            builder = SaveFileBuilder(title, file.path, "Rename", validate_name, check_name)
            builder.dialog.set_transient_for(self.window)
            builder.name_entry.set_text(file.path)

            builder.prompt_for_name(self.notebook.folder, extension, do_rename)
            builder.dialog.destroy()

    def on_file_list_delete_file(self, file_list, file):
        dialog = gtk.MessageDialog(parent=self.window, buttons=gtk.BUTTONS_NONE,
                                   type=gtk.MESSAGE_WARNING)
        message = format_escaped("<big><b>Really delete '%s'?</b></big>", file.path)
        dialog.set_markup(message)

        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
                           gtk.STOCK_DELETE, gtk.RESPONSE_OK)
        dialog.set_default_response(gtk.RESPONSE_CANCEL)
        response = dialog.run()
        dialog.destroy()

        if response != gtk.RESPONSE_OK:
            return

        for editor in self.editors:
            if editor.file == file:
                self._close_editor(editor)

        abspath = os.path.join(self.notebook.folder, file.path)
        os.remove(abspath)
        self.notebook.refresh()

    def on_hpaned_notify_position(self, pane, gparamspec):
        self.state.set_pane_position(pane.get_property('position'))
 def __init__(self):
     self.file_obj = FileList()
     pass
Example #11
0
 def __init__(self, parent, notebook, options):
     FileList.__init__(self, parent, notebook, options)
Example #12
0
    def change_path(self, path=None, selected=None):
        """Change file list path."""
        if path is not None and not path.startswith('trash://'):
            path = 'trash:///'

        FileList.change_path(self, path, selected)
Example #13
0
class NotebookWindow(BaseNotebookWindow):
    UI_STRING="""
<ui>
   <menubar name="TopMenu">
      <menu action="file">
         <menuitem action="new-notebook"/>
         <menuitem action="open-notebook"/>
         <menuitem action="notebook-properties"/>
         <separator/>
         <menuitem action="new-worksheet"/>
         <menuitem action="new-library"/>
         <menuitem action="open"/>
         <menuitem action="save"/>
         <menuitem action="rename"/>
         <menuitem action="close"/>
         <separator/>
         <menuitem action="quit"/>
      </menu>
      <menu action="edit">
         <menuitem action="cut"/>
         <menuitem action="copy"/>
         <menuitem action="copy-as-doctests"/>
         <menuitem action="paste"/>
         <menuitem action="delete"/>
         <separator/>
         <menuitem action="calculate"/>
         <menuitem action="break"/>
         <separator/>
         <menuitem action="calculate-all"/>
      </menu>
	<menu action="help">
        <menuitem action="about"/>
      </menu>
   </menubar>
   <toolbar name="ToolBar">
      <toolitem action="save"/>
      <separator/>
      <toolitem action="calculate"/>
      <toolitem action="break"/>
   </toolbar>
</ui>
"""
    def __init__(self, notebook):
        BaseNotebookWindow.__init__(self, notebook)

        self.window.set_default_size(800, 800)

    #######################################################
    # Overrides
    #######################################################

    def _fill_content(self):
        hpaned = gtk.HPaned()
        hpaned.set_position(200)
        self.main_vbox.pack_start(hpaned, expand=True, fill=True)

        scrolled_window = gtk.ScrolledWindow()
        scrolled_window.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
        hpaned.pack1(scrolled_window, resize=False)

        self.__file_list = FileList(self.notebook)
        scrolled_window.add(self.__file_list)
        self.__file_list.connect('open-file', self.on_file_list_open_file)

        hpaned.pack2(self.nb_widget, resize=True)

        self.nb_widget.set_scrollable(True)

    def _add_editor(self, editor):
        # Set first since _add_editor() calls _update_editor_title()
        editor._notebook_tab_label = gtk.Label()
        editor._notebook_tab_status = gtk.Image()
        editor._notebook_tab_status.props.icon_size = gtk.ICON_SIZE_MENU
        BaseNotebookWindow._add_editor(self, editor)

        label_widget = gtk.HBox(False, 4)
        label_widget.pack_start(editor._notebook_tab_status, True, True, 0)
        label_widget.pack_start(editor._notebook_tab_label, True, True, 0)
        tab_button = gtk.Button()
        tab_button.set_name('notebook-close-button')
        tab_button.set_relief(gtk.RELIEF_NONE)
        tab_button.props.can_focus = False
        tab_button.connect('clicked', lambda *args: self.on_tab_close_button_clicked(editor))
        label_widget.pack_start(tab_button, False, False, 0)
        close = gtk.image_new_from_stock('gtk-close', gtk.ICON_SIZE_MENU)
        tab_button.add(close)
        label_widget.show_all()

        self.nb_widget.set_tab_label(editor.widget, label_widget)

        self.nb_widget.set_tab_reorderable(editor.widget, True)

    def _update_editor_title(self, editor):
        BaseNotebookWindow._update_editor_title(self, editor)
        editor._notebook_tab_label.set_text(editor.title)

    def _update_editor_state(self, editor):
        BaseNotebookWindow._update_editor_state(self, editor)
        editor._notebook_tab_status.props.stock = NotebookFile.stock_id_for_state(editor.state)

    #######################################################
    # Callbacks
    #######################################################

    def on_tab_close_button_clicked(self, editor):
        self._close_editor(editor)

    def on_file_list_open_file(self, file_list, file):
        self.open_file(file)
Example #14
0
	def __init__(self, parent, notebook, options):
		FileList.__init__(self, parent, notebook, options)
Example #15
0
    def change_path(self, path=None, selected=None):
        """Change file list path."""
        if path is not None and not path.startswith('trash:'):
            path = self.get_provider().get_root_path(None)

        FileList.change_path(self, path, selected)
Example #16
0
	def change_path(self, path=None, selected=None):
		"""Change file list path."""
		if path is not None and not path.startswith('trash:'):
			path = self.get_provider().get_root_path(None)

		FileList.change_path(self, path, selected)
class DataReader(object):
    def __init__(self):
        self.file_obj = FileList()
        pass
        ## print(tf.__version__)
    def build_array_with_cnv_to_gene(self,
                                     cnv_df,
                                     array_2_gene,
                                     gene_2_array,
                                     array_id_key='Array_ID'):
        '''
		Function:
			只保留R分析後,有CNV的資料。
			r analysis cnv result to gene symbol.
		'''
        print("In build_array_with_cnv_to_gene ...")
        # probe_id_list = list(cnv_df.columns.values)

        cnv_ary = cnv_df.to_dict(orient='records')

        # for temp_index,row in cnv_df.iterrows():
        result_ary = []
        total_count = 0
        hit_count = 0
        no_hit_count = 0
        for cnv_dict in cnv_ary:
            # print(cnv_dict)
            gene_cnv_dict = {}
            array_id = cnv_dict[array_id_key]
            gene_cnv_dict[array_id_key] = array_id

            sample_hit_gene_dict = {}
            ### sample with cnv
            if array_id in array_2_gene:
                sample_hit_gene_dict = self.list_2_dict(array_2_gene[array_id])

                for gene_symbol, value in gene_2_array.items():
                    hit_flag = 0
                    if gene_symbol in sample_hit_gene_dict:
                        hit_flag = 1
                        # print(array_id, gene_symbol)
                    else:
                        hit_flag = 0

                    gene_cnv_dict[gene_symbol] = hit_flag
                result_ary.append(gene_cnv_dict)

                hit_count += 1
            else:
                no_hit_count += 1

            total_count += 1

        gene_df = pd.DataFrame.from_dict(result_ary)

        print("Total_count = {}, hit_count = {}, no_hit_count = {}.".format(
            total_count, hit_count, no_hit_count))
        # probe_id_list = list(gene_df.columns.values)
        # print(probe_id_list)
        # print(gene_df)
        # print(len(cnv_df))
        # print(cnv_df[array_id_key])

        print("Out build_cnv_to_gene ...")

        return gene_df

    def get_cnv_gainloss_to_gene_table(self,
                                       input_file,
                                       array_index=3,
                                       gain_loss_index=4,
                                       gene_index=8):
        """
		Function:
			將array_id和gene_symbol gain/loss的mapping file整理成Dict.
			回傳兩種型態.
			array_2_gene和gene_2_array

		Input:
			input_file: 為array_id對應gene_symbol的summary file.
			array_index: column number of array_id.
			gainloss_index: indec of gain/loss field.
			gene_index: column number fo gene.
		"""

        fh_input = open(input_file, 'r')
        fh_csv = csv.reader(fh_input, delimiter='\t')

        ## header string
        # # row = next(fh_csv)
        # # features_ary = np.array(row)

        array_2_gene = {}
        array_id = ''
        gene_symbol = ''
        gene_2_array = {}
        for row in fh_csv:
            # print(row)
            array_id = row[array_index]
            gene_symbol = row[gene_index]
            gain_loss_int = row[gain_loss_index]

            gene_symbol_with_gain_loss = ''
            if gain_loss_int == 1:
                gene_symbol_with_gain_loss = str(gene_symbol) + "_Gain"
            else:
                gene_symbol_with_gain_loss = str(gene_symbol) + "_Loss"

            if not array_id in array_2_gene:
                array_2_gene[array_id] = []
            array_2_gene[array_id].append(gene_symbol_with_gain_loss)

            if not gene_symbol_with_gain_loss in gene_2_array:
                gene_2_array[gene_symbol_with_gain_loss] = []
            gene_2_array[gene_symbol_with_gain_loss].append(array_id)

        fh_input.close()

        ### remove redundant data.
        refine_array_2_gene = {}
        for array_id, gene_ary in array_2_gene.items():
            gene_ary = list(set(gene_ary))
            refine_array_2_gene[array_id] = gene_ary

        refine_gene_2_array = {}
        for gene_symbol, array_id_ary in gene_2_array.items():
            array_id_ary = list(set(array_id_ary))
            refine_gene_2_array[gene_symbol] = array_id_ary

        return refine_array_2_gene, refine_gene_2_array

    def build_region_40_summary_with_gainloss_file(self, input_path,
                                                   summary_file):
        '''
		整理region40的結果,包含gain/loss資訊(1/-1).
		輸出格式為: chr,start,end,gpr_id,gain/loss
		'''
        fh_output = open(summary_file, 'w')
        fh_output.write('chr\tstart\tend\tarray_id\tgain_loss\n')

        feature_ary = ['Chromosome', 'Start', 'End', 'Gain_loss']
        region_40_dict = self.read_region_40_file_pipeline(input_path)

        cnv_count = 0
        normal_count = 0
        for gpr_id, data_df in region_40_dict.items():
            if not data_df is None:
                ary_len = len(data_df)

                feature_list = data_df.loc[:, feature_ary].values
                print(gpr_id, ary_len)
                # print(feature_list)
                for temp_chr, temp_start, temp_end, temp_gain_loss in feature_list:
                    temp_gain_loss_int = 0
                    temp_gain_loss = str(temp_gain_loss)
                    if temp_gain_loss == "Loss":
                        temp_gain_loss_int = -1
                    elif temp_gain_loss == "Gain":
                        temp_gain_loss_int = 1
                    else:
                        temp_gain_loss_int = 0

                    temp_start = re.sub(r'[,]', '', temp_start)
                    temp_end = re.sub(r'[,]', '', temp_end)

                    if temp_chr == 23:
                        temp_chr = 'X'
                    elif temp_chr == 24:
                        temp_chr = 'Y'
                    else:
                        pass
                    print(
                        temp_chr,
                        temp_start,
                        temp_end,
                        temp_gain_loss,
                        temp_gain_loss_int,
                    )
                    fh_output.write('chr{}\t{}\t{}\t{}\t{}\n'.format(
                        temp_chr, temp_start, temp_end, gpr_id,
                        temp_gain_loss_int))
                # print(data_df)
                cnv_count += 1
            else:
                print("Normal.  {} .....".format(gpr_id))

                normal_count += 1

        print("cnv_count = {}, normal_count = {}.".format(
            cnv_count, normal_count))

        print("Output file = {}".format(summary_file))
        fh_output.close()

    def build_region_40_summary_file(self, input_path, summary_file):
        '''
		整理region40的結果.
		輸出格式為: chr,start,end,gpr_id
		'''
        fh_output = open(summary_file, 'w')
        fh_output.write('chr\tstart\tend\tarray_id\n')

        feature_ary = ['Chromosome', 'Start', 'End']
        region_40_dict = self.read_region_40_file_pipeline(input_path)

        cnv_count = 0
        normal_count = 0
        for gpr_id, data_df in region_40_dict.items():
            if not data_df is None:
                ary_len = len(data_df)

                feature_list = data_df.loc[:, feature_ary].values
                print(gpr_id, ary_len)
                # print(feature_list)
                for temp_chr, temp_start, temp_end in feature_list:
                    temp_start = re.sub(r'[,]', '', temp_start)
                    temp_end = re.sub(r'[,]', '', temp_end)

                    if temp_chr == 23:
                        temp_chr = 'X'
                    elif temp_chr == 24:
                        temp_chr = 'Y'
                    else:
                        pass
                    print(temp_chr, temp_start, temp_end)
                    fh_output.write('chr{}\t{}\t{}\t{}\n'.format(
                        temp_chr, temp_start, temp_end, gpr_id))
                # print(data_df)
                cnv_count += 1
            else:
                print("Normal.  {} .....".format(gpr_id))

                normal_count += 1

        print("cnv_count = {}, normal_count = {}.".format(
            cnv_count, normal_count))

        print("Output file = {}".format(summary_file))
        fh_output.close()

    def list_2_dict(self, input_list):
        result_dict = dict((tmp_key, tmp_key) for tmp_key in input_list)
        return result_dict

    def build_array_to_gene(self,
                            cnv_df,
                            array_2_gene,
                            gene_2_array,
                            array_id_key='Array_ID'):
        '''
		Function:
			r analysis cnv result to gene symbol.
		'''
        print("In build_cnv_to_gene ...")
        # probe_id_list = list(cnv_df.columns.values)

        cnv_ary = cnv_df.to_dict(orient='records')

        # for temp_index,row in cnv_df.iterrows():
        result_ary = []
        for cnv_dict in cnv_ary:
            # print(cnv_dict)
            gene_cnv_dict = {}
            array_id = cnv_dict[array_id_key]
            gene_cnv_dict[array_id_key] = array_id

            sample_hit_gene_dict = {}
            ### sample with cnv
            if array_id in array_2_gene:
                sample_hit_gene_dict = self.list_2_dict(array_2_gene[array_id])
            else:
                pass

            for gene_symbol, value in gene_2_array.items():
                hit_flag = 0
                if gene_symbol in sample_hit_gene_dict:
                    hit_flag = 1
                    print(array_id, gene_symbol)
                else:
                    hit_flag = 0

                gene_cnv_dict[gene_symbol] = hit_flag
            result_ary.append(gene_cnv_dict)

        gene_df = pd.DataFrame.from_dict(result_ary)

        # probe_id_list = list(gene_df.columns.values)
        # print(probe_id_list)
        # print(gene_df)
        # print(len(cnv_df))
        # print(cnv_df[array_id_key])

        print("Out build_cnv_to_gene ...")

        return gene_df

    def get_cnv_to_gene_table(self, input_file, array_index=3, gene_index=7):
        """
		Function:
			將array_id和gene_symbol的mapping file整理成Dict.
			回傳兩種型態.
			array_2_gene和gene_2_array

		Input:
			input_file: 為array_id對應gene_symbol的summary file.
			array_index: column number of array_id.
			gene_index: column number fo gene.
		"""

        fh_input = open(input_file, 'r')
        fh_csv = csv.reader(fh_input, delimiter='\t')

        ## header string
        # # row = next(fh_csv)
        # # features_ary = np.array(row)

        array_2_gene = {}
        array_id = ''
        gene_symbol = ''
        gene_2_array = {}
        for row in fh_csv:
            # print(row)
            array_id = row[array_index]
            gene_symbol = row[gene_index]

            if not array_id in array_2_gene:
                array_2_gene[array_id] = []
            array_2_gene[array_id].append(gene_symbol)

            if not gene_symbol in gene_2_array:
                gene_2_array[gene_symbol] = []
            gene_2_array[gene_symbol].append(array_id)

        fh_input.close()

        ### remove redundant data.
        refine_array_2_gene = {}
        for array_id, gene_ary in array_2_gene.items():
            gene_ary = list(set(gene_ary))
            refine_array_2_gene[array_id] = gene_ary

        refine_gene_2_array = {}
        for gene_symbol, array_id_ary in gene_2_array.items():
            array_id_ary = list(set(array_id_ary))
            refine_gene_2_array[gene_symbol] = array_id_ary

        return refine_array_2_gene, refine_gene_2_array

    def build_cnv_to_gene(self, cnv_df, gene_2_probe, array_id_key='Array_ID'):
        print("In build_cnv_to_gene ...")
        # probe_id_list = list(cnv_df.columns.values)

        cnv_ary = cnv_df.to_dict(orient='records')

        # for temp_index,row in cnv_df.iterrows():
        result_ary = []
        for cnv_dict in cnv_ary:
            # print(cnv_dict)
            gene_cnv_dict = {}
            gene_cnv_dict[array_id_key] = cnv_dict[array_id_key]
            for gene_symbol, value in gene_2_probe.items():
                temp_sum = 0.0
                temp_count = 0
                for probe_id in value:

                    if probe_id in cnv_dict:
                        try:
                            temp_sum += float(cnv_dict[probe_id])

                            temp_count += 1
                        except:
                            temp_sum += 0.0

                if temp_count == 0:
                    temp_count = 1
                temp_sum = temp_sum / temp_count
                gene_cnv_dict[gene_symbol] = temp_sum
            result_ary.append(gene_cnv_dict)

        gene_df = pd.DataFrame.from_dict(result_ary)

        # probe_id_list = list(gene_df.columns.values)
        # print(probe_id_list)
        # print(gene_df)
        # print(len(cnv_df))
        # print(cnv_df[array_id_key])

        print("Out build_cnv_to_gene ...")

        return gene_df

    def get_probe_to_gene_table(self, input_file, probe_index=3, gene_index=7):
        """
		Input:
			input_file: csv file.
			probe_index: column number of probe.
			gene_index: column number fo gene.
		"""

        fh_input = open(input_file, 'r')
        fh_csv = csv.reader(fh_input, delimiter='\t')

        ## header string
        # # row = next(fh_csv)
        # # features_ary = np.array(row)

        probe_2_gene = {}
        probe_id = ''
        gene_symbol = ''
        gene_2_probe = {}
        for row in fh_csv:
            # print(row)
            probe_id = row[probe_index]
            gene_symbol = row[gene_index]
            probe_2_gene[probe_id] = gene_symbol

            if not gene_symbol in gene_2_probe:
                gene_2_probe[gene_symbol] = []

            gene_2_probe[gene_symbol].append(probe_id)

        fh_input.close()
        return probe_2_gene, gene_2_probe

    def combine_outcome_data(self,
                             cnv_df,
                             outcome_dict,
                             combine_column='Array_ID'):
        '''
		'''
        outcome_df = pd.DataFrame.from_dict(outcome_dict, orient='index')
        outcome_df.columns = ['cnv_outcome']

        outcome_df[combine_column] = outcome_df.index
        merge_df = pd.merge(cnv_df, outcome_df)
        # print(merge_df)

        return merge_df

    def cnv_data_reader_pipeline(self, input_path):
        '''
		Function:
			資料來源為CNV結果,(R分析之後的結果)
			產生以probe_id為column的data frame.
			value為log2 ration.

		'''
        probe_ary = []
        ### 紀錄probe_id的聯集
        columns_dict = {'Array_ID': 'Array_ID'}

        cnv_all_df = None
        file_list = self.file_obj.get_all_probe_bind_file(input_path)
        gpr_code = ''
        file_count = len(file_list)
        temp_count = 0
        for temp_file in file_list:
            gpr_code = self.file_obj.get_gpr_code_from_path(temp_file)

            ## probe_info_dict ={ probe_id:log2, ...}
            probe_info_dict = self.region_40_file_reader_to_dict(
                temp_file, gpr_code[0], columns_dict)

            probe_ary.append(probe_info_dict)

            temp_count += 1

            print("%s/%s, File = %s" % (temp_count, file_count, temp_file))
            print("gpr_code = %s" % (gpr_code[0]))
            print("columns_dict len = %s" % (len(columns_dict)))

            # ### debug info
            # if temp_count ==20:
            # 	break

        data_dict = {}
        log2_value = 0
        for probe_info_dict in probe_ary:
            for temp_key in columns_dict.keys():
                if temp_key in probe_info_dict:
                    log2_value = probe_info_dict[temp_key]
                else:
                    log2_value = 0

                if not temp_key in data_dict:
                    data_dict[temp_key] = []

                data_dict[temp_key].append(log2_value)

        cnv_all_df = pd.DataFrame.from_dict(data_dict)
        # print(cnv_all_df)
        # print(cnv_all_df.info())
        return cnv_all_df

    def cnv_data_reader(self,
                        input_file,
                        array_id,
                        colunm_tag='ID',
                        value_tag='log2'):
        data_df = self.region_40_file_reader(input_file)
        # data_frame_t = data_df.set_index(colunm_tag).T

        ### get certain row as value
        serial_log2 = data_df.loc[:, [colunm_tag, value_tag]]

        ### set array_id as index and transpose as columns
        serial_log2 = serial_log2.set_index(colunm_tag).T

        ### assign array_id to index label
        serial_log2 = serial_log2.rename(index={value_tag: array_id})

        ### adding array_id column
        serial_log2['Array_ID'] = serial_log2.index

        return serial_log2

    def cnv_data_reader_pipeline_bak(self, input_path):
        '''
		'''
        cnv_all_df = None
        file_list = self.file_obj.get_all_probe_bind_file(input_path)
        gpr_code = ''
        file_count = len(file_list)
        temp_count = 0
        for temp_file in file_list:
            gpr_code = self.file_obj.get_gpr_code_from_path(temp_file)
            cnv_df = self.cnv_data_reader(temp_file, gpr_code[0])

            if temp_count > 0:
                cnv_all_df = cnv_all_df.append(cnv_df.copy())
            else:
                cnv_all_df = cnv_df.copy()

            temp_count += 1

            # print(type(cnv_all_df))
            print("%s/%s, File = %s" % (temp_count, file_count, temp_file))
            print("gpr_code = %s" % (gpr_code[0]))

        return cnv_all_df

    def cnv_data_reader_bak(self,
                            input_file,
                            array_id,
                            colunm_tag='ID',
                            value_tag='log2'):
        data_df = self.region_40_file_reader(input_file)

        ### set array_id as index and transpose as columns
        data_frame_t = data_df.set_index(colunm_tag).T

        ### get certain row as value
        serial_log2 = data_frame_t.loc[value_tag, :]

        ## convert serials to data_frame
        data_frame_t_log2 = serial_log2.to_frame()
        data_frame_t_log2 = data_frame_t_log2.T

        ### assign array_id to index label
        data_frame_t_log2 = data_frame_t_log2.rename(
            index={value_tag: array_id})

        ### adding array_id column
        data_frame_t_log2['Array_ID'] = data_frame_t_log2.index

        # print(type(data_frame_t_log2))

        return data_frame_t_log2

    def read_all_probe_bind_pipeline(self, input_path):
        '''
		Output:
			result_dict: {}
				key: gpr_id,
				value: cnv_df, pandas dataframe
		'''
        file_list = self.file_obj.get_all_probe_bind_file(input_path)
        gpr_code = ''
        result_dict = {}
        file_count = len(file_list)
        temp_count = 0
        for temp_file in file_list:
            print("%s/%s, File = %s" % (temp_count, file_count, temp_file))
            cnv_df = None
            gpr_code = self.file_obj.get_gpr_code_from_path(temp_file)
            cnv_df = self.region_40_file_reader(temp_file)
            result_dict[gpr_code[0]] = cnv_df

            print("gpr_code = %s" % (gpr_code))

            temp_count += 1

        # result_df = self.pd_read_csv_data(input_file)

        return result_dict

    def read_region_40_file_pipeline(self, input_path):
        '''
		Output:
			result_dict: {}
				key: gpr_id,
				value: cnv_df, pandas dataframe
		'''
        region_40_list = self.file_obj.get_region_40_file(input_path)
        gpr_code = ''
        result_dict = {}
        for temp_file in region_40_list:
            print("File = %s" % (temp_file))
            cnv_df = None
            gpr_code = self.file_obj.get_gpr_code_from_path(temp_file)
            cnv_df = self.region_40_file_reader(temp_file)
            result_dict[gpr_code[0]] = cnv_df

            print("gpr_code = %s" % (gpr_code))

        # result_df = self.pd_read_csv_data(input_file)

        return result_dict

    def region_40_file_reader_to_dict(self, input_file, array_id, record_dict):
        '''
		Function:
			讀取region_40/All_probe_bind的結果,紀錄probe_id和對應的log2_ratio。
			根據header來判斷檔案內是否有資料。
		Input:
			input_file: tab format input file.
			record_dict: 記錄所有probe_id.
		Output:
			pandas data frame.
		'''
        probe_id_index = 0
        log2_index = 5
        result_df = None
        fh_input = open(input_file, 'r', encoding="latin1")
        header_str = next(fh_input)
        temp_ary = header_str.split('\t')

        probe_dict = {}
        probe_dict['Array_ID'] = array_id
        if len(temp_ary) > 1:
            for temp_str in fh_input:
                temp_ary = temp_str.split('\t')
                probe_id = temp_ary[probe_id_index]
                log2_value = temp_ary[log2_index]
                probe_dict[probe_id] = log2_value
                record_dict[probe_id] = probe_id

        fh_input.close()
        return probe_dict

    def region_40_file_reader(self, input_file):
        '''
		Function:
			根據header來判斷檔案內是否有資料。
		Input:
			tab format input file.
		Output:
			pandas data frame.
		'''
        result_df = None
        fh_input = open(input_file, 'r')
        header_str = next(fh_input)
        temp_ary = header_str.split('\t')
        fh_input.close()
        if len(temp_ary) > 1:
            result_df = self.pd_read_csv_data(input_file)

        return result_df

    def read_blast_count_data(self, sys_obj, input_dir):
        accessnum_index = sys_obj.get_accessnum_index()
        blast_count_index = sys_obj.get_blast_count_index()

        blast_count_df = self.pd_read_csv_data_from_dir(input_dir)
        blast_value_temp = blast_count_df.copy()
        blast_value_temp.iloc[:,
                              accessnum_index] = blast_value_temp.iloc[:,
                                                                       accessnum_index].str.replace(
                                                                           ",",
                                                                           ""
                                                                       ).str.replace(
                                                                           ".",
                                                                           "")
        blast_value_temp = blast_value_temp.iloc[:, [
            accessnum_index, blast_count_index
        ]]

        blast_count_refine = blast_value_temp
        blast_count_refine.columns = ["Specimen_ID", "Blast_count"]

        # blast_count_refine = blast_value_temp.rename(columns={"Accessnum":"Specimen ID"})

        # print(blast_count_refine)
        # print("Total blast_count_refine = {0} .".format(len(blast_count_refine)))

        return blast_count_refine

    def pd_read_csv_data_from_dir(self, input_dir, extension_str='.csv'):
        '''
		Input:
			input_dir: '/input/dir'
		Output:
			feature_ary = np.array
			data_x = np.array
			data_y = np.array

		'''
        file_obj = FileList()

        file_ary = file_obj.find_file(input_dir, extension_str)

        df_list = []
        for temp_file in file_ary:
            temp_df = self.pd_read_csv_data(temp_file)

            df_list.append(temp_df)

        result_df = pd.concat(df_list, axis=0)

        ## reset all index
        result_df = result_df.set_index(np.arange(result_df.shape[0]))

        return result_df

    def pd_read_csv_data(self, input_file):
        data_frame = pd.read_csv(input_file, delimiter='\t', encoding="latin1")
        # print(data_frame)

        return data_frame

    def read_csv_data(self, input_file, outcome_index=26):
        '''
		feature_ary = np.array (header)
		data_x = np.array
		data_y = np.array
		'''
        fh_input = open(input_file, 'r')
        # csv_input = csv.reader.(fh_input, delimiter=',', quotechar='"')
        fh_csv = csv.reader(fh_input, delimiter=',')

        ## header string
        row = next(fh_csv)
        features_ary = np.array(row)

        data_x = []
        data_y = []
        for row in fh_csv:
            data_x.append(row)
            data_y.append(row[outcome_index])

        data_x = np.array(data_x)
        data_y = np.array(data_y)
        fh_input.close()

        return features_ary, data_x, data_y

    def get_csv_data_from_dir(self,
                              input_dir,
                              outcome_index=26,
                              extension_str='.csv'):
        '''
		Input:
			input_dir: '/input/dir'
		Output:
			feature_ary = np.array
			data_x = np.array
			data_y = np.array

		'''
        file_obj = FileList()

        file_ary = file_obj.find_file(input_dir, extension_str)
        data_x = []
        data_y = []

        data_x = np.array(data_x)
        data_y = np.array(data_y)

        for temp_index, temp_file in enumerate(file_ary):
            features_ary, temp_data_x, temp_data_y = self.read_csv_data(
                temp_file, outcome_index)

            if temp_index == 0:
                data_x = temp_data_x
                data_y = temp_data_y
            else:
                data_x = np.concatenate((data_x, temp_data_x), axis=0)
                data_y = np.concatenate((data_y, temp_data_y), axis=0)

            print(temp_file)
            print(len(temp_data_x))
            print(len(data_x))

        return features_ary, data_x, data_y

    def pd_read_txt(self, data_dir, file_path):
        obj = pd.read_csv(data_dir + file_path,
                          sep='\t',
                          header=None,
                          names=['Array_ID', 'tif_path'])

        return obj

    def tif_ary_reader(self, data_df, path_column, y_label):

        records_num = len(data_df.index)
        print('How many records:', records_num)

        data_ary = []
        for i in range(records_num):
            path = data_df.loc[i, path_column]
            y = data_df.loc[i, y_label]

            im = Image.open(path)
            img_ary = np.array(im)  # scaling the input
            img_ary = np.divide(img_ary,
                                255).astype('uint8')  # uint16 -> uint8
            # img_ary = img_ary.astype('uint8')
            if img_ary.shape[0] > img_ary.shape[1]:
                print('*** Img transpose ***:')
                print(i)
                print('Img shape origin:')
                print(img_ary.shape)
                img_ary = np.transpose(img_ary)
                print('After transpose:')
                print(img_ary.shape)

            ## 為了以image augmentation 解決 imbalance,旋轉放在後面流程處理,寫在 data_processing裡面
            resized_img_ary = cv2.resize(
                img_ary,
                (1220,
                 432))  #(610, 216)   cv2.resize 跟 array.shape 的顯示是顛倒的 ...
            # M = cv2.getRotationMatrix2D((img_px_size/2, img_px_size/2), randint(-10, 10), 1)
            # rotated_img_ary = cv2.warpAffine(resized_img_ary, M, (img_px_size, img_px_size))
            im.close()

            data_ary.append([resized_img_ary, y])

        return data_ary

    def png_ary_reader(self, data_df, path_column, y_label):
        records_num = len(data_df.index)
        print('How many records:', records_num)

        data_ary = []
        crop_box = (60, 100, 2369, 800)  # 左右: 切到邊框   上下: 切到 +/- 3
        for i in range(records_num):
            print('Fig ', i, ' processing... \n')
            path = data_df.loc[i, path_column]
            y = data_df.loc[i, y_label]

            im = Image.open(path)
            crop_im = im.crop(crop_box)

            img_ary = np.array(crop_im)  # scaling the input
            # img_ary = np.divide(img_ary, 255).astype('uint8') # uint16 -> uint8
            # img_ary = img_ary.astype('uint8')
            # if img_ary.shape[0] > img_ary.shape[1]:
            # 	print('*** Img transpose ***:')
            # 	print(i)
            # 	print('Img shape origin:')
            # 	print(img_ary.shape)
            # 	img_ary = np.transpose(img_ary)
            # 	print('After transpose:')
            # 	print(img_ary.shape)

            ## After crop -> 2309 * 800
            resized_img_ary = cv2.resize(
                img_ary,
                (1150,
                 400))  #(610, 216)   cv2.resize 跟 array.shape 的顯示是顛倒的 ...
            # M = cv2.getRotationMatrix2D((img_px_size/2, img_px_size/2), randint(-10, 10), 1)
            # rotated_img_ary = cv2.warpAffine(resized_img_ary, M, (img_px_size, img_px_size))
            im.close()

            data_ary.append([resized_img_ary, y])

        return data_ary