def OnDownload(self, e): self.button_download.Enable(False) words_text = self.text_box.GetValue().strip() words_not_downloaded = list() words = words_text.split('\n') len_words_text = len(words_text) if len_words_text > 0: progressbar_frame = Progressbar(len(words)) progressbar_frame.Show(True) for word in words: progressbar_frame.set_label('Downloading: ' +word) progressbar_frame.step() try: self.model.download(word) except: words_not_downloaded.append(word) progressbar_frame.Show(False) else: wx.MessageBox('Enter some words', 'Info', wx.OK | wx.ICON_INFORMATION) self.button_download.Enable(True) self.show_completed_dialog(len(words_not_downloaded), len_words_text) self.text_box.SetValue('\n'.join(words_not_downloaded))
def OnDownload(self, e): self.button_download.Enable(False) words_text = self.text_box.GetValue().strip() words_not_downloaded = list() words = words_text.split('\n') len_words_text = len(words_text) if len_words_text > 0: progressbar_frame = Progressbar(len(words)) progressbar_frame.Show(True) for word in words: progressbar_frame.set_label('Downloading: ' + word) progressbar_frame.step() try: self.model.download(word) except: words_not_downloaded.append(word) progressbar_frame.Show(False) else: wx.MessageBox('Enter some words', 'Info', wx.OK | wx.ICON_INFORMATION) self.button_download.Enable(True) self.show_completed_dialog(len(words_not_downloaded), len_words_text) self.text_box.SetValue('\n'.join(words_not_downloaded))
print 'Ignoring unknown gene: %s' % rec.description continue if geneName in created_protein_names: print 'Ignoring duplicate gene: %s' % rec.description continue # create a new Protein object created_protein_names.add(geneName) protein_seq[geneName] = seq all_proteins.append(Protein(geneName, seq, params.K)) print print "Counted k-mer (k=%d) for %d different genes (proteins)." % ( params.K, len(all_proteins)) pb = Progressbar( 'Generating frequency dictionary for k-mers that appear more than once in protein' ) kmers_frequency = dict() # track popularity of kmer accross all proteins i = 0 for prot in all_proteins: i += 1 pb.update_progress(i, len(all_proteins)) for kmer in prot.repeating_non_overlapping_kmers: if kmer not in kmers_frequency: kmers_frequency[kmer] = set() # add the new protein only if it's dissimilar enough from all other # proteins that were already added and contain this kmer protein_names = kmers_frequency[
all_prots = g1 + g2 + g3 + g4 + g5 + g6 + g7 + g8 + g9 + g10 genes = set() genes.update(set(['HMX3', 'B7Z774', 'B4DF88', 'hCG_2039588', 'TLE3', 'BHLHE22', 'Q7KZE5', 'DKFZp762H2012', 'SP5', 'TLE4', 'TBX2', 'A0A1U9X8A2', 'PITX3', 'NKX2-3', 'B7Z4A9', 'B3KQ29', 'B4DP89', 'ZNF703', 'HOXA13', 'PBX3', 'EOMES', 'ZSWIM6', 'B7Z5Q0', 'ZIC2', 'FBXL17', 'SPTLC2', 'FOXB2', 'GSX2', 'SP8', 'Q53EU2', 'PBX1', 'FOXD3', 'IRF2BPL', 'EVX2', 'A8K350', 'Q6NW39', 'B4E2W3', 'GATA6', 'B3KUA2', 'A0A1U9X8A1', 'PBX2', 'NLK', 'B3KM67', 'NKX6-1', 'SOX21', 'RBM47', 'ID4', 'MAZ', 'FLJ20273', 'EGR2', 'RBM24', 'PRDM8', 'ZNF395'])) all_prots = list(genes) from random import shuffle shuffle(all_prots) for p in all_prots: print p # dilute similar proteins diluted_list = [] pb = Progressbar('Diluting proteins...') i = 0 for protein_name1 in all_prots: i += 1 pb.update_progress(i, len(all_prots)) redundantProt = False for protein_name in diluted_list: try: #print '%s: Checking similarity of %s and %s' % (kmer, protein_name, prot.geneName) if not utils.proteins_are_dissimilar(protein_name, protein_name1, protein_seq[protein_name], protein_seq[protein_name1]): redundantProt = True break except: continue if not redundantProt:
def startup(self): """ Construct and show the Toga application. Usually, you would add your application to a main content box. We then create a main window (with a name matching the app), and show the main window. """ self.logger.info( "Starting Application\n------------------------------------------------" ) self.logger.info(str(self.paths.app)) self.facepack_dirs = set([ "African", "Asian", "Caucasian", "Central European", "EECA", "Italmed", "MENA", "MESA", "SAMed", "Scandinavian", "Seasian", "South American", "SpanMed", "YugoGreek" ]) self.mode_info = { "Overwrite": "Overwrites already replaced faces", "Preserve": "Preserves already replaced faces", "Generate": "Generates mapping from scratch." } os.makedirs(str(self.paths.app) + "/.config", exist_ok=True) if not os.path.isfile(str(self.paths.app) + "/.user/cfg.json"): shutil.copyfile( str(self.paths.app) + "/.user/default_cfg.json", str(self.paths.app) + "/.user/cfg.json") self.logger.info("Loading current profile") self.profile_manager = Profile_Manager( Config_Manager().get_latest_prf( str(self.paths.app) + "/.user/cfg.json"), str(self.paths.app)) self.profile_manager.migrate_config() self.logger.info("Creating GUI") self.main_box = toga.Box() self.logger.info("Created main box") self.hook = "https://discord.com/api/webhooks/796137178328989768/ETMNtPVb-PHuZPayC5G5MZD24tdDi5jmG6jAgjZXg0FDOXjy-VIabATXPco05qLIr4ro" # CREATE MENUBAR troubleshooting = toga.Command( lambda e=None, u= "https://github.com/Maradonna90/NewGAN-Manager/wiki/Troubleshooting": self.open_link(u), label='Troubleshooting', group=toga.Group.HELP, section=1) usage = toga.Command( lambda e=None, u="https://www.youtube.com/watch?v=iJqZNp0nomM": self.open_link(u), label='User Guide', group=toga.Group.HELP, section=0) faq = toga.Command( lambda e=None, u= "https://github.com/Maradonna90/NewGAN-Manager/wiki/FAQ": self. open_link(u), label='FAQ', group=toga.Group.HELP, section=2) discord = toga.Command( lambda e=None, u="https://discord.gg/UfRpJVc": self.open_link(u), label='Discord', group=toga.Group.HELP, section=3) self.commands.add(discord, faq, troubleshooting, usage) label_width = 125 # TOP Profiles prf_box = toga.Box() self.logger.info("Created prf_box") prf_inp = toga.TextInput() self.logger.info("Created prf_inp") self.prfsel_box = toga.Box() prf_lab = toga.Label(text="Create Profile: ") prf_lab.style.update(width=label_width) prfsel_lab = toga.Label(text="Select Profile: ") prfsel_lab.style.update(width=label_width) self.prfsel_lst = SourceSelection(items=list( self.profile_manager.config["Profile"].keys()), on_select=self._set_profile_status) self.prfsel_lst.value = self.profile_manager.cur_prf prfsel_btn = toga.Button( label="Delete", on_press=lambda e=None, c=self.prfsel_lst: self._delete_profile(c)) prf_btn = toga.Button(label="Create", on_press=lambda e=None, d=prf_inp, c=self. prfsel_lst: self._create_profile(d, c)) self.main_box.add(prf_box) prf_box.add(prf_lab) prf_box.add(prf_inp) prf_box.add(prf_btn) prf_lab.style.update(padding_top=7) prf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.main_box.add(self.prfsel_box) self.prfsel_box.add(prfsel_lab) self.prfsel_box.add(self.prfsel_lst) self.prfsel_box.add(prfsel_btn) self.prfsel_lst.style.update(direction=ROW, padding=(0, 20), flex=1) prfsel_lab.style.update(padding_top=7) # MID Path selections dir_box = toga.Box() dir_lab = toga.Label(text="Select Image Directory: ") dir_lab.style.update(width=label_width) self.dir_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['img_dir']) self.dir_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.dir_btn = toga.Button(label="...", on_press=self.action_select_folder_dialog, enabled=False) rtf_box = toga.Box() rtf_lab = toga.Label(text="RTF File: ") rtf_lab.style.update(width=label_width) self.rtf_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['rtf']) self.rtf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.rtf_btn = toga.Button(label="...", on_press=self.action_open_file_dialog, enabled=False) self.main_box.add(dir_box) self.main_box.add(rtf_box) dir_box.add(dir_lab) dir_box.add(self.dir_inp) dir_box.add(self.dir_btn) rtf_box.add(rtf_lab) rtf_box.add(self.rtf_inp) rtf_box.add(self.rtf_btn) dir_lab.style.update(padding_top=7) rtf_lab.style.update(padding_top=7) gen_mode_box = toga.Box() self.genmde_lab = toga.Label(text="Mode: ") self.genmde_lab.style.update(width=label_width) self.genmdeinfo_lab = toga.Label(text=self.mode_info["Generate"]) self.gendup = toga.Switch(label="Allow Duplicates?") self.genmde_lst = SourceSelection(items=list(self.mode_info.keys()), on_select=self.update_label) self.genmde_lst.value = "Generate" self.genmde_lst.style.update(direction=ROW, padding=(0, 20), flex=1) self.genmde_lab.style.update(padding_top=7) self.genmdeinfo_lab.style.update(padding_top=7) self.gendup.style.update(padding_top=7, padding_left=20) gen_mode_box.add(self.genmde_lab) gen_mode_box.add(self.genmde_lst) gen_mode_box.add(self.genmdeinfo_lab) gen_mode_box.add(self.gendup) self.main_box.add(gen_mode_box) # BOTTOM Generation gen_box = toga.Box() self.gen_btn = toga.Button(label="Replace Faces", on_press=self._replace_faces, enabled=False) self.gen_btn.style.update(padding_bottom=20) self.gen_lab = toga.Label(text="") # self.gen_prg = toga.ProgressBar(max=110) self.gen_prg = Progressbar(label=self.gen_lab) gen_box.add(self.gen_btn) gen_box.add(self.gen_lab) gen_box.add(self.gen_prg) self.main_box.add(gen_box) self.gen_prg.style.update(width=570, alignment="center") self.gen_lab.style.update(padding_top=20, padding_bottom=20, width=100, alignment="center") # Report bad image rep_box = toga.Box() self.rep_lab = toga.Label(text="Player UID: ") self.rep_lab.style.update(width=label_width) self.rep_inp = toga.TextInput(on_change=self.change_image) self.rep_img = toga.ImageView(toga.Image("resources/logo.png")) self.rep_img.style.update(height=180) self.rep_img.style.update(width=180) self.rep_btn = toga.Button(label="Report", on_press=self.send_report, enabled=False) rep_box.add(self.rep_lab) rep_box.add(self.rep_inp) rep_box.add(self.rep_img) rep_box.add(self.rep_btn) self.main_box.add(rep_box) self.rep_lab.style.update(padding_top=10) self.rep_inp.style.update(direction=ROW, padding=(0, 20), flex=1) # END config self.prfsel_box.style.update(padding_bottom=20) dir_box.style.update(padding_bottom=20) prf_box.style.update(padding_bottom=20) rtf_box.style.update(padding_bottom=20) gen_mode_box.style.update(padding_bottom=20) rep_box.style.update(padding_top=20) gen_box.style.update(direction=COLUMN, alignment='center') self.main_box.style.update(direction=COLUMN, padding=30, alignment='center') self.main_window = toga.MainWindow(title=self.formal_name, size=(1000, 600)) self.main_window.content = self.main_box self.main_window.show() self.check_for_update()
class NewGANManager(toga.App): def __init__(self, log): super().__init__() self.logger = log def startup(self): """ Construct and show the Toga application. Usually, you would add your application to a main content box. We then create a main window (with a name matching the app), and show the main window. """ self.logger.info( "Starting Application\n------------------------------------------------" ) self.logger.info(str(self.paths.app)) self.facepack_dirs = set([ "African", "Asian", "Caucasian", "Central European", "EECA", "Italmed", "MENA", "MESA", "SAMed", "Scandinavian", "Seasian", "South American", "SpanMed", "YugoGreek" ]) self.mode_info = { "Overwrite": "Overwrites already replaced faces", "Preserve": "Preserves already replaced faces", "Generate": "Generates mapping from scratch." } os.makedirs(str(self.paths.app) + "/.config", exist_ok=True) if not os.path.isfile(str(self.paths.app) + "/.user/cfg.json"): shutil.copyfile( str(self.paths.app) + "/.user/default_cfg.json", str(self.paths.app) + "/.user/cfg.json") self.logger.info("Loading current profile") self.profile_manager = Profile_Manager( Config_Manager().get_latest_prf( str(self.paths.app) + "/.user/cfg.json"), str(self.paths.app)) self.profile_manager.migrate_config() self.logger.info("Creating GUI") self.main_box = toga.Box() self.logger.info("Created main box") self.hook = "https://discord.com/api/webhooks/796137178328989768/ETMNtPVb-PHuZPayC5G5MZD24tdDi5jmG6jAgjZXg0FDOXjy-VIabATXPco05qLIr4ro" # CREATE MENUBAR troubleshooting = toga.Command( lambda e=None, u= "https://github.com/Maradonna90/NewGAN-Manager/wiki/Troubleshooting": self.open_link(u), label='Troubleshooting', group=toga.Group.HELP, section=1) usage = toga.Command( lambda e=None, u="https://www.youtube.com/watch?v=iJqZNp0nomM": self.open_link(u), label='User Guide', group=toga.Group.HELP, section=0) faq = toga.Command( lambda e=None, u= "https://github.com/Maradonna90/NewGAN-Manager/wiki/FAQ": self. open_link(u), label='FAQ', group=toga.Group.HELP, section=2) discord = toga.Command( lambda e=None, u="https://discord.gg/UfRpJVc": self.open_link(u), label='Discord', group=toga.Group.HELP, section=3) self.commands.add(discord, faq, troubleshooting, usage) label_width = 125 # TOP Profiles prf_box = toga.Box() self.logger.info("Created prf_box") prf_inp = toga.TextInput() self.logger.info("Created prf_inp") self.prfsel_box = toga.Box() prf_lab = toga.Label(text="Create Profile: ") prf_lab.style.update(width=label_width) prfsel_lab = toga.Label(text="Select Profile: ") prfsel_lab.style.update(width=label_width) self.prfsel_lst = SourceSelection(items=list( self.profile_manager.config["Profile"].keys()), on_select=self._set_profile_status) self.prfsel_lst.value = self.profile_manager.cur_prf prfsel_btn = toga.Button( label="Delete", on_press=lambda e=None, c=self.prfsel_lst: self._delete_profile(c)) prf_btn = toga.Button(label="Create", on_press=lambda e=None, d=prf_inp, c=self. prfsel_lst: self._create_profile(d, c)) self.main_box.add(prf_box) prf_box.add(prf_lab) prf_box.add(prf_inp) prf_box.add(prf_btn) prf_lab.style.update(padding_top=7) prf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.main_box.add(self.prfsel_box) self.prfsel_box.add(prfsel_lab) self.prfsel_box.add(self.prfsel_lst) self.prfsel_box.add(prfsel_btn) self.prfsel_lst.style.update(direction=ROW, padding=(0, 20), flex=1) prfsel_lab.style.update(padding_top=7) # MID Path selections dir_box = toga.Box() dir_lab = toga.Label(text="Select Image Directory: ") dir_lab.style.update(width=label_width) self.dir_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['img_dir']) self.dir_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.dir_btn = toga.Button(label="...", on_press=self.action_select_folder_dialog, enabled=False) rtf_box = toga.Box() rtf_lab = toga.Label(text="RTF File: ") rtf_lab.style.update(width=label_width) self.rtf_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['rtf']) self.rtf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.rtf_btn = toga.Button(label="...", on_press=self.action_open_file_dialog, enabled=False) self.main_box.add(dir_box) self.main_box.add(rtf_box) dir_box.add(dir_lab) dir_box.add(self.dir_inp) dir_box.add(self.dir_btn) rtf_box.add(rtf_lab) rtf_box.add(self.rtf_inp) rtf_box.add(self.rtf_btn) dir_lab.style.update(padding_top=7) rtf_lab.style.update(padding_top=7) gen_mode_box = toga.Box() self.genmde_lab = toga.Label(text="Mode: ") self.genmde_lab.style.update(width=label_width) self.genmdeinfo_lab = toga.Label(text=self.mode_info["Generate"]) self.gendup = toga.Switch(label="Allow Duplicates?") self.genmde_lst = SourceSelection(items=list(self.mode_info.keys()), on_select=self.update_label) self.genmde_lst.value = "Generate" self.genmde_lst.style.update(direction=ROW, padding=(0, 20), flex=1) self.genmde_lab.style.update(padding_top=7) self.genmdeinfo_lab.style.update(padding_top=7) self.gendup.style.update(padding_top=7, padding_left=20) gen_mode_box.add(self.genmde_lab) gen_mode_box.add(self.genmde_lst) gen_mode_box.add(self.genmdeinfo_lab) gen_mode_box.add(self.gendup) self.main_box.add(gen_mode_box) # BOTTOM Generation gen_box = toga.Box() self.gen_btn = toga.Button(label="Replace Faces", on_press=self._replace_faces, enabled=False) self.gen_btn.style.update(padding_bottom=20) self.gen_lab = toga.Label(text="") # self.gen_prg = toga.ProgressBar(max=110) self.gen_prg = Progressbar(label=self.gen_lab) gen_box.add(self.gen_btn) gen_box.add(self.gen_lab) gen_box.add(self.gen_prg) self.main_box.add(gen_box) self.gen_prg.style.update(width=570, alignment="center") self.gen_lab.style.update(padding_top=20, padding_bottom=20, width=100, alignment="center") # Report bad image rep_box = toga.Box() self.rep_lab = toga.Label(text="Player UID: ") self.rep_lab.style.update(width=label_width) self.rep_inp = toga.TextInput(on_change=self.change_image) self.rep_img = toga.ImageView(toga.Image("resources/logo.png")) self.rep_img.style.update(height=180) self.rep_img.style.update(width=180) self.rep_btn = toga.Button(label="Report", on_press=self.send_report, enabled=False) rep_box.add(self.rep_lab) rep_box.add(self.rep_inp) rep_box.add(self.rep_img) rep_box.add(self.rep_btn) self.main_box.add(rep_box) self.rep_lab.style.update(padding_top=10) self.rep_inp.style.update(direction=ROW, padding=(0, 20), flex=1) # END config self.prfsel_box.style.update(padding_bottom=20) dir_box.style.update(padding_bottom=20) prf_box.style.update(padding_bottom=20) rtf_box.style.update(padding_bottom=20) gen_mode_box.style.update(padding_bottom=20) rep_box.style.update(padding_top=20) gen_box.style.update(direction=COLUMN, alignment='center') self.main_box.style.update(direction=COLUMN, padding=30, alignment='center') self.main_window = toga.MainWindow(title=self.formal_name, size=(1000, 600)) self.main_window.content = self.main_box self.main_window.show() self.check_for_update() def open_link(self, url): webbrowser.open(url) def update_label(self, widget): self.logger.info("Updating generation label") self.genmdeinfo_lab.text = self.mode_info[widget.value] def set_btns(self, value): if self.profile_manager.cur_prf == "No Profile": self.gen_btn.enabled = False self.dir_btn.enabled = False self.rtf_btn.enabled = False self.rep_btn.enabled = False else: self.gen_btn.enabled = value self.dir_btn.enabled = value self.rtf_btn.enabled = value self.rep_btn.enabled = value def _set_profile_status(self, e): self.logger.info("switch profile: {}".format(e.value)) if e.value is None: self.logger.info("catch none {}".format( self.profile_manager.cur_prf)) elif e.value == self.profile_manager.cur_prf: self.logger.info("catch same values") else: name = e.value self.profile_manager.load_profile(name) self._refresh_inp() self.set_btns(True) Config_Manager().save_config( str(self.paths.app) + "/.user/cfg.json", self.profile_manager.config) def _refresh_inp(self, clear=False): self.logger.info("Refresh Input Buttons") if clear: self.dir_inp.clear() self.rtf_inp.clear() else: self.dir_inp.value = self.profile_manager.prf_cfg['img_dir'] self.rtf_inp.value = self.profile_manager.prf_cfg['rtf'] def _create_profile(self, ent, c): name = ent.value self.profile_manager.create_profile(name) ent.clear() c.add_item(name) def _delete_profile(self, c): prf = c.value self.profile_manager.delete_profile(prf) c.remove_item(prf) self._refresh_inp(True) self.set_btns(False) def _throw_error(self, msg): self.logger.info("Error window {}:".format(msg)) self.main_window.error_dialog('Error', msg) def _show_info(self, msg): self.logger.info("Info window: {}".format(msg)) self.main_window.info_dialog("Info", msg) def action_select_folder_dialog(self, widget): self.logger.info("Select Folder...") try: path_names = self.main_window.select_folder_dialog( title="Select image root folder") self.dir_inp.value = path_names[0] + "/" self.profile_manager.prf_cfg['img_dir'] = path_names[0] + "/" Config_Manager().save_config( str(self.paths.app) + "/.user/" + self.profile_manager.cur_prf + ".json", self.profile_manager.prf_cfg) except Exception: pass def action_open_file_dialog(self, widget): self.logger.info("Select File...") try: fname = self.main_window.open_file_dialog(title="Open RTF file", multiselect=False, file_types=['rtf']) self.logger.info("Created file-dialog") if fname is not None: fname = str(fname) self.rtf_inp.value = fname self.profile_manager.prf_cfg['rtf'] = fname self.logger.info("RTF file: " + fname) Config_Manager().save_config( str(self.paths.app) + "/.user/" + self.profile_manager.cur_prf + ".json", self.profile_manager.prf_cfg) else: self.profile_manager.prf_cfg['rtf'] = "" self.rtf_inp.value = "" Config_Manager().save_config( str(self.paths.app) + "/.user/" + self.profile_manager.cur_prf + ".json", self.profile_manager.prf_cfg) except Exception: self.logger.error("Fatal error in main loop", exc_info=True) pass def _replace_faces(self, widget): self.logger.info("Start Replace Faces") # get values from UI elements rtf = self.profile_manager.prf_cfg['rtf'] img_dir = self.profile_manager.prf_cfg['img_dir'] profile = self.profile_manager.cur_prf mode = self.genmde_lst.value if not os.path.isfile(rtf): self._throw_error("The RTF file doesn't exist!") self.gen_prg.stop() self.profile_manager.prf_cfg['rtf'] = '' return if not os.path.isdir(img_dir): self._throw_error("The image directory doesn't exist!") self.gen_prg.stop() self.profile_manager.prf_cfg['img_dir'] = '' return # Check if valid image_directory contains all the needed subfolders img_dirs = set() for entry in os.scandir(img_dir): if entry.is_dir(): img_dirs.add(entry.name) for fp_dir in self.facepack_dirs: if fp_dir not in img_dirs: self._throw_error( "Folder {} is missing in the image directory".format( fp_dir)) self.gen_prg.stop() return self.logger.info("rtf: {}".format(rtf)) self.logger.info("img_dir: {}".format(img_dir)) self.logger.info("profile: {}".format(profile)) self.logger.info("mode: {}".format(mode)) self.set_btns(False) self.gen_prg.start() self.gen_prg.update_label("Parsing RTF") # yield 0.1 rtf_parser = RTF_Parser() if not rtf_parser.is_rtf_valid(rtf): self._throw_error("The RTF file is invalid!") self.gen_prg.stop() return rtf_data = rtf_parser.parse_rtf(rtf) self.gen_prg.update_progress(20) self.gen_prg.update_label("Map player to ethnicity") # yield 0.1 mapping_data = Mapper(img_dir, self.profile_manager).generate_mapping( rtf_data, mode, self.gendup.is_on) self.gen_prg.update_progress(60) self.gen_prg.update_label("Generate config.xml") # yield 0.1 self.profile_manager.write_xml(mapping_data) # save profile metadata (used pics and config.xml) self.gen_prg.update_label("Save metadata for profile") self.gen_prg.update_progress(10) # yield 0.1 Config_Manager().save_config( str(self.paths.app) + "/.user/" + profile + ".json", self.profile_manager.prf_cfg) self.gen_prg.update_progress(10) # yield 0.1 self.gen_prg.update_label("Finished! :)") # yield 0.1 self._show_info("Finished! :)") self.gen_prg.stop() self.set_btns(True) def change_image(self, id): self.logger.info("try to change image preview") uid = id.value if len(uid) >= 7: try: img_path = XML_Parser().get_imgpath_from_uid( self.profile_manager.prf_cfg['img_dir'] + "config.xml", uid) img_path = self.profile_manager.prf_cfg[ 'img_dir'] + img_path + ".png" self.rep_img.image = toga.Image(img_path) self.logger.info( "change image preview to: {}".format(img_path)) except Exception as e: self.logger.info("changing image preview failed!") self.logger.info(e) return return def send_report(self, e): uid = self.rep_inp.value if len(uid) >= 7: rep = Reporter( self.hook, self.profile_manager.prf_cfg['img_dir'] + "config.xml") res = rep.send_report(uid) if res: self._show_info("Thanks for Reporting!") self.rep_img.image = toga.Image("resources/logo.png") self.rep_inp.value = "" else: self._throw_error( "Player with ID {} doesn't exist!".format(uid)) self.rep_img.image = toga.Image("resources/logo.png") self.rep_inp.value = "" def check_for_update(self): r = requests.get( "https://raw.githubusercontent.com/Maradonna90/NewGAN-Manager/master/version" ) if r.text.strip() != self.version: self._show_info("There is a new version. Please Update!") self.open_link( "https://github.com/Maradonna90/NewGAN-Manager/releases/latest" )
def main(proteome_file, similar_diluted): all_proteins = list() # all_proteins[i] = PROTEIN()_OBJECT protein_seq = dict() # protein_seq[GENE_NAME] = AMINO_ACID_SEQUENCE print "Reading Uniprot file and generating k-mers list for each protein..." created_protein_names = set( ) # prevent creation of two similar protein objects duplicate_genes_ignored = 0 for rec in SeqIO.parse(open(proteome_file), 'fasta'): seq = str(rec.seq) uniqueIdentifier, entryName, proteinName, organismName, geneName = \ utils.parse_UniProtKB_header(rec.description) if geneName == '': geneName = uniqueIdentifier #print 'Using uncharacterized gene with identifier %s' % uniqueIdentifier #print 'Ignoring unknown gene: %s' % rec.description #continue if geneName in created_protein_names: duplicate_genes_ignored += 1 #print 'Ignoring duplicate gene: %s' % rec.description continue # create a new Protein object created_protein_names.add(geneName) protein_seq[geneName] = seq all_proteins.append(Protein(geneName, seq, params.K)) print print "Ignored %d duplicate genes." % duplicate_genes_ignored print "Counted k-mer (k=%d) for %d different genes (proteins)." % ( params.K, len(all_proteins)) pb = Progressbar('Generating frequency dictionary for k-mers') kmers_frequency = dict() # track popularity of kmer accross all proteins i = 0 for prot in all_proteins: i += 1 pb.update_progress(i, len(all_proteins)) for kmer in prot.kmers: if kmer not in kmers_frequency: kmers_frequency[kmer] = set() # add the new protein only if it's dissimilar enough from all other # proteins that were already added and contain this kmer protein_names = kmers_frequency[ kmer] # list of all prots that share this kmer redundantProt = False if similar_diluted: for protein_name in protein_names: #print '%s: Checking similarity of %s and %s' % (kmer, protein_name, prot.geneName) if not utils.proteins_are_dissimilar( protein_name, prot.geneName, protein_seq[protein_name], prot.seq): redundantProt = True break if not redundantProt: kmers_frequency[kmer].add(prot.geneName) redundantProt = False print "Sorting frequent k-mers by frequency..." most_frequenct_kmers = sorted(kmers_frequency, key=lambda k: len(kmers_frequency[k]), reverse=True) print "Writing results to file..." import datetime, time, csv timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d-%H%M%S') filename_without_extension = os.path.splitext( os.path.basename(proteome_file))[0] dilution_status = 'with dilution' if similar_diluted else 'without dilution' outfile = "{0} - frequent k{1}-mers - {2} - {3}.csv".format( filename_without_extension, params.K, dilution_status, timestamp) outfile = os.path.join(os.path.dirname(proteome_file), outfile) with open(outfile, "wb") as csv_file: writer = csv.writer(csv_file, delimiter=',') writer.writerow([ 'k-mer', 'number of proteins', 'percentage', 'all', '(Out of %d proteins in total)' % len(all_proteins) ]) for kmer in most_frequenct_kmers: total_proteins = len(kmers_frequency[kmer]) if total_proteins < 5: break percentage = round(float(total_proteins) / len(all_proteins), 6) geneList = list(kmers_frequency[kmer]) #geneList = '\r\n'.join(geneList) row = [kmer, total_proteins, percentage, geneList] writer.writerow(row)
def startup(self): """ Construct and show the Toga application. Usually, you would add your application to a main content box. We then create a main window (with a name matching the app), and show the main window. """ self.logger.info( "Starting Application\n------------------------------------------------" ) self.logger.info(str(self.paths.app)) self.mode_info = { "Overwrite": "Overwrites already replaced faces", "Preserve": "Preserves already replaced faces", "Generate": "Generates mapping from scratch." } os.makedirs(str(self.paths.app) + "/.config", exist_ok=True) self.logger.info("Loading current profile") self.profile_manager = Profile_Manager( Config_Manager().get_latest_prf( str(self.paths.app) + "/.user/cfg.json"), str(self.paths.app)) self.profile_manager.migrate_config() self.logger.info("Creating GUI") self.main_box = toga.Box() self.logger.info("Created main box") label_width = 125 # TOP Profiles prf_box = toga.Box() self.logger.info("Created prf_box") prf_inp = toga.TextInput() self.logger.info("Created prf_inp") self.prfsel_box = toga.Box() prf_lab = toga.Label(text="Create Profile: ") prf_lab.style.update(width=label_width) prfsel_lab = toga.Label(text="Select Profile: ") prfsel_lab.style.update(width=label_width) self.prfsel_lst = SourceSelection(items=list( self.profile_manager.config["Profile"].keys()), on_select=self._set_profile_status) self.prfsel_lst.value = self.profile_manager.cur_prf prfsel_btn = toga.Button( label="Delete", on_press=lambda e=None, c=self.prfsel_lst: self._delete_profile(c)) prf_btn = toga.Button(label="Create", on_press=lambda e=None, d=prf_inp, c=self. prfsel_lst: self._create_profile(d, c)) self.main_box.add(prf_box) prf_box.add(prf_lab) prf_box.add(prf_inp) prf_box.add(prf_btn) prf_lab.style.update(padding_top=7) prf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.main_box.add(self.prfsel_box) self.prfsel_box.add(prfsel_lab) self.prfsel_box.add(self.prfsel_lst) self.prfsel_box.add(prfsel_btn) self.prfsel_lst.style.update(direction=ROW, padding=(0, 20), flex=1) prfsel_lab.style.update(padding_top=7) # MID Path selections dir_box = toga.Box() dir_lab = toga.Label(text="Select Image Directory: ") dir_lab.style.update(width=label_width) self.dir_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['img_dir']) self.dir_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.dir_btn = toga.Button(label="...", on_press=self.action_select_folder_dialog, enabled=False) rtf_box = toga.Box() rtf_lab = toga.Label(text="RTF File: ") rtf_lab.style.update(width=label_width) self.rtf_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['rtf']) self.rtf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.rtf_btn = toga.Button(label="...", on_press=self.action_open_file_dialog, enabled=False) self.main_box.add(dir_box) self.main_box.add(rtf_box) dir_box.add(dir_lab) dir_box.add(self.dir_inp) dir_box.add(self.dir_btn) rtf_box.add(rtf_lab) rtf_box.add(self.rtf_inp) rtf_box.add(self.rtf_btn) dir_lab.style.update(padding_top=7) rtf_lab.style.update(padding_top=7) gen_mode_box = toga.Box() self.genmde_lab = toga.Label(text="Mode: ") self.genmde_lab.style.update(width=label_width) self.genmdeinfo_lab = toga.Label(text=self.mode_info["Generate"]) self.genmde_lst = SourceSelection(items=list(self.mode_info.keys()), on_select=self.update_label) self.genmde_lst.value = "Generate" self.genmde_lst.style.update(direction=ROW, padding=(0, 20), flex=1) self.genmde_lab.style.update(padding_top=7) self.genmdeinfo_lab.style.update(padding_top=7) gen_mode_box.add(self.genmde_lab) gen_mode_box.add(self.genmde_lst) gen_mode_box.add(self.genmdeinfo_lab) self.main_box.add(gen_mode_box) # BOTTOM Generation gen_box = toga.Box() self.gen_btn = toga.Button(label="Replace Faces", on_press=self._replace_faces, enabled=False) self.gen_btn.style.update(padding_bottom=20) self.gen_lab = toga.Label(text="") # self.gen_prg = toga.ProgressBar(max=110) self.gen_prg = Progressbar(label=self.gen_lab) gen_box.add(self.gen_btn) gen_box.add(self.gen_lab) gen_box.add(self.gen_prg) self.main_box.add(gen_box) self.gen_prg.style.update(width=570, alignment="center") self.gen_lab.style.update(padding_top=20, padding_bottom=20, width=100, alignment="center") # Report bad image rep_box = toga.Box() self.rep_lab = toga.Label(text="Player UID: ") self.rep_lab.style.update(width=label_width) self.rep_inp = toga.TextInput(on_change=self.change_image) self.rep_img = toga.ImageView(toga.Image("resources/logo.png")) self.rep_img.style.update(height=180) self.rep_img.style.update(width=180) self.rep_btn = toga.Button(label="Report", on_press=self.send_report, enabled=False) rep_box.add(self.rep_lab) rep_box.add(self.rep_inp) rep_box.add(self.rep_img) rep_box.add(self.rep_btn) self.main_box.add(rep_box) self.rep_lab.style.update(padding_top=10) self.rep_inp.style.update(direction=ROW, padding=(0, 20), flex=1) # END config self.prfsel_box.style.update(padding_bottom=20) dir_box.style.update(padding_bottom=20) prf_box.style.update(padding_bottom=20) rtf_box.style.update(padding_bottom=20) gen_mode_box.style.update(padding_bottom=20) rep_box.style.update(padding_top=20) gen_box.style.update(direction=COLUMN, alignment='center') self.main_box.style.update(direction=COLUMN, padding=30, alignment='center') self.main_window = toga.MainWindow(title=self.formal_name, size=(1000, 600)) self.main_window.content = self.main_box self.main_window.show()
# ORIGINAL_DATASET_PATH = 'dataset/recent_tweets_test/houston_tweets.csv' # PROCESSED_DATASET_PATH = 'dataset/recent_tweets_test/houston_tweets-extended.csv' ORIGINAL_DATASET_PATH = 'dataset/recent_tweets_test/miami_tweets.csv' PROCESSED_DATASET_PATH = 'dataset/recent_tweets_test/miami_tweets-extended.csv' # Read original dataset logger.info('Reading from %s' % ORIGINAL_DATASET_PATH) orig_dataset = [ tweetRec for tweetRec in csv.DictReader(open(ORIGINAL_DATASET_PATH, 'rb')) ] total_tweets = len(orig_dataset) logger.info('%d rows fetched' % total_tweets) # Write new dataset logger.info('Writing to %s' % PROCESSED_DATASET_PATH) pb = Progressbar('Building extended dataset') with open(PROCESSED_DATASET_PATH, 'wb') as csvfile: dataset_keys = orig_dataset[0].keys() + ['link_url1', 'link_uri1', 'link_title1',\ 'link_url2', 'link_uri2', 'link_title2',\ 'link_url3', 'link_uri3', 'link_title3'] csvwriter = csv.DictWriter(csvfile, fieldnames=dataset_keys) csvwriter.writeheader() for i in xrange(len(orig_dataset)): pb.update_progress(i, total_tweets) tweet = orig_dataset[i]['text'] try: # Step 1 - Check if there are any links in tweet tweet_parser = ttp.Parser().parse(tweet)
class NewGANManager(toga.App): def __init__(self, log): super().__init__() self.logger = log def startup(self): """ Construct and show the Toga application. Usually, you would add your application to a main content box. We then create a main window (with a name matching the app), and show the main window. """ self.logger.info( "Starting Application\n------------------------------------------------" ) self.logger.info(str(self.paths.app)) self.mode_info = { "Overwrite": "Overwrites already replaced faces", "Preserve": "Preserves already replaced faces", "Generate": "Generates mapping from scratch." } os.makedirs(str(self.paths.app) + "/.config", exist_ok=True) self.logger.info("Loading current profile") self.profile_manager = Profile_Manager( Config_Manager().get_latest_prf( str(self.paths.app) + "/.user/cfg.json"), str(self.paths.app)) self.profile_manager.migrate_config() self.logger.info("Creating GUI") self.main_box = toga.Box() self.logger.info("Created main box") label_width = 125 # TOP Profiles prf_box = toga.Box() self.logger.info("Created prf_box") prf_inp = toga.TextInput() self.logger.info("Created prf_inp") self.prfsel_box = toga.Box() prf_lab = toga.Label(text="Create Profile: ") prf_lab.style.update(width=label_width) prfsel_lab = toga.Label(text="Select Profile: ") prfsel_lab.style.update(width=label_width) self.prfsel_lst = SourceSelection(items=list( self.profile_manager.config["Profile"].keys()), on_select=self._set_profile_status) self.prfsel_lst.value = self.profile_manager.cur_prf prfsel_btn = toga.Button( label="Delete", on_press=lambda e=None, c=self.prfsel_lst: self._delete_profile(c)) prf_btn = toga.Button(label="Create", on_press=lambda e=None, d=prf_inp, c=self. prfsel_lst: self._create_profile(d, c)) self.main_box.add(prf_box) prf_box.add(prf_lab) prf_box.add(prf_inp) prf_box.add(prf_btn) prf_lab.style.update(padding_top=7) prf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.main_box.add(self.prfsel_box) self.prfsel_box.add(prfsel_lab) self.prfsel_box.add(self.prfsel_lst) self.prfsel_box.add(prfsel_btn) self.prfsel_lst.style.update(direction=ROW, padding=(0, 20), flex=1) prfsel_lab.style.update(padding_top=7) # MID Path selections dir_box = toga.Box() dir_lab = toga.Label(text="Select Image Directory: ") dir_lab.style.update(width=label_width) self.dir_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['img_dir']) self.dir_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.dir_btn = toga.Button(label="...", on_press=self.action_select_folder_dialog, enabled=False) rtf_box = toga.Box() rtf_lab = toga.Label(text="RTF File: ") rtf_lab.style.update(width=label_width) self.rtf_inp = toga.TextInput( readonly=True, initial=self.profile_manager.prf_cfg['rtf']) self.rtf_inp.style.update(direction=ROW, padding=(0, 20), flex=1) self.rtf_btn = toga.Button(label="...", on_press=self.action_open_file_dialog, enabled=False) self.main_box.add(dir_box) self.main_box.add(rtf_box) dir_box.add(dir_lab) dir_box.add(self.dir_inp) dir_box.add(self.dir_btn) rtf_box.add(rtf_lab) rtf_box.add(self.rtf_inp) rtf_box.add(self.rtf_btn) dir_lab.style.update(padding_top=7) rtf_lab.style.update(padding_top=7) gen_mode_box = toga.Box() self.genmde_lab = toga.Label(text="Mode: ") self.genmde_lab.style.update(width=label_width) self.genmdeinfo_lab = toga.Label(text=self.mode_info["Generate"]) self.genmde_lst = SourceSelection(items=list(self.mode_info.keys()), on_select=self.update_label) self.genmde_lst.value = "Generate" self.genmde_lst.style.update(direction=ROW, padding=(0, 20), flex=1) self.genmde_lab.style.update(padding_top=7) self.genmdeinfo_lab.style.update(padding_top=7) gen_mode_box.add(self.genmde_lab) gen_mode_box.add(self.genmde_lst) gen_mode_box.add(self.genmdeinfo_lab) self.main_box.add(gen_mode_box) # BOTTOM Generation gen_box = toga.Box() self.gen_btn = toga.Button(label="Replace Faces", on_press=self._replace_faces, enabled=False) self.gen_btn.style.update(padding_bottom=20) self.gen_lab = toga.Label(text="") # self.gen_prg = toga.ProgressBar(max=110) self.gen_prg = Progressbar(label=self.gen_lab) gen_box.add(self.gen_btn) gen_box.add(self.gen_lab) gen_box.add(self.gen_prg) self.main_box.add(gen_box) self.gen_prg.style.update(width=570, alignment="center") self.gen_lab.style.update(padding_top=20, padding_bottom=20, width=100, alignment="center") # Report bad image rep_box = toga.Box() self.rep_lab = toga.Label(text="Player UID: ") self.rep_lab.style.update(width=label_width) self.rep_inp = toga.TextInput(on_change=self.change_image) self.rep_img = toga.ImageView(toga.Image("resources/logo.png")) self.rep_img.style.update(height=180) self.rep_img.style.update(width=180) self.rep_btn = toga.Button(label="Report", on_press=self.send_report, enabled=False) rep_box.add(self.rep_lab) rep_box.add(self.rep_inp) rep_box.add(self.rep_img) rep_box.add(self.rep_btn) self.main_box.add(rep_box) self.rep_lab.style.update(padding_top=10) self.rep_inp.style.update(direction=ROW, padding=(0, 20), flex=1) # END config self.prfsel_box.style.update(padding_bottom=20) dir_box.style.update(padding_bottom=20) prf_box.style.update(padding_bottom=20) rtf_box.style.update(padding_bottom=20) gen_mode_box.style.update(padding_bottom=20) rep_box.style.update(padding_top=20) gen_box.style.update(direction=COLUMN, alignment='center') self.main_box.style.update(direction=COLUMN, padding=30, alignment='center') self.main_window = toga.MainWindow(title=self.formal_name, size=(1000, 600)) self.main_window.content = self.main_box self.main_window.show() def update_label(self, widget): self.logger.info("Updating generation label") self.genmdeinfo_lab.text = self.mode_info[widget.value] def set_btns(self, value): if self.profile_manager.cur_prf == "No Profile": self.gen_btn.enabled = False self.dir_btn.enabled = False self.rtf_btn.enabled = False self.rep_btn.enabled = False else: self.gen_btn.enabled = value self.dir_btn.enabled = value self.rtf_btn.enabled = value self.rep_btn.enabled = value def _set_profile_status(self, e): self.logger.info("switch profile: {}".format(e.value)) if e.value is None: self.logger.info("catch none {}".format( self.profile_manager.cur_prf)) elif e.value == self.profile_manager.cur_prf: self.logger.info("catch same values") else: name = e.value self.profile_manager.load_profile(name) self._refresh_inp() self.set_btns(True) Config_Manager().save_config( str(self.paths.app) + "/.user/cfg.json", self.profile_manager.config) def _refresh_inp(self, clear=False): self.logger.info("Refresh Input Buttons") if clear: self.dir_inp.clear() self.rtf_inp.clear() else: self.dir_inp.value = self.profile_manager.prf_cfg['img_dir'] self.rtf_inp.value = self.profile_manager.prf_cfg['rtf'] def _create_profile(self, ent, c): name = ent.value self.profile_manager.create_profile(name) ent.clear() c.add_item(name) def _delete_profile(self, c): prf = c.value self.profile_manager.delete_profile(prf) c.remove_item(prf) self._refresh_inp(True) self.set_btns(False) def _throw_error(self, msg): self.logger.info("Error window {}:".format(msg)) self.main_window.error_dialog('Error', msg) def _show_info(self, msg): self.logger.info("Info window: {}".format(msg)) self.main_window.info_dialog("Info", msg) def action_select_folder_dialog(self, widget): self.logger.info("Select Folder...") try: path_names = self.main_window.select_folder_dialog( title="Select image root folder") self.dir_inp.value = path_names[0] + "/" self.profile_manager.prf_cfg['img_dir'] = path_names[0] + "/" Config_Manager().save_config( str(self.paths.app) + "/.user/" + self.profile_manager.cur_prf + ".json", self.profile_manager.prf_cfg) except Exception: pass def action_open_file_dialog(self, widget): self.logger.info("Select File...") try: fname = self.main_window.open_file_dialog(title="Open RTF file", multiselect=False, file_types=['rtf']) self.logger.info("Created file-dialog") if fname is not None: self.rtf_inp.value = fname self.profile_manager.prf_cfg['rtf'] = fname Config_Manager().save_config( str(self.paths.app) + "/.user/" + self.profile_manager.cur_prf + ".json", self.profile_manager.prf_cfg) else: self.profile_manager.prf_cfg['rtf'] = "" self.rtf_inp.value = "" Config_Manager().save_config( str(self.paths.app) + "/.user/" + self.profile_manager.cur_prf + ".json", self.profile_manager.prf_cfg) except Exception: self.logger.error("Fatal error in main loop", exc_info=True) pass def _replace_faces(self, widget): self.logger.info("Start Replace Faces") # get values from UI elements rtf = self.profile_manager.prf_cfg['rtf'] img_dir = self.profile_manager.prf_cfg['img_dir'] profile = self.profile_manager.cur_prf mode = self.genmde_lst.value if not os.path.isfile(rtf): self._throw_error("The RTF file doesn't exist!") self.gen_prg.stop() self.profile_manager.prf_cfg['rtf'] = '' return if not os.path.isdir(img_dir): self._throw_error("The image directory doesn't exist!") self.gen_prg.stop() self.profile_manager.prf_cfg['img_dir'] = '' return self.logger.info("rtf: {}".format(rtf)) self.logger.info("img_dir: {}".format(img_dir)) self.logger.info("profile: {}".format(profile)) self.logger.info("mode: {}".format(mode)) self.gen_prg.start() self.gen_prg.update_label("Parsing RTF") yield 0.1 rtf_data = RTF_Parser().parse_rtf(rtf) self.gen_prg.update_progress(20) self.gen_prg.update_label("Map player to ethnicity") yield 0.1 mapping_data = Mapper(img_dir, self.profile_manager).generate_mapping( rtf_data, mode) self.gen_prg.update_progress(60) self.gen_prg.update_label("Generate config.xml") yield 0.1 self.profile_manager.write_xml(mapping_data) # save profile metadata (used pics and config.xml) self.gen_prg.update_label("Save metadata for profile") self.gen_prg.update_progress(10) yield 0.1 Config_Manager().save_config( str(self.paths.app) + "/.user/" + profile + ".json", self.profile_manager.prf_cfg) self.gen_prg.update_progress(10) yield 0.1 self.gen_prg.update_label("Finished! :)") yield 0.1 self._show_info("Finished! :)") self.gen_prg.stop() def change_image(self, id): self.logger.info("try to change image preview") uid = id.value try: img_name = self.prf_cfg["imgs"][uid] img_eth = self.prf_cfg["ethnics"][uid] img_path = self.prf_cfg["img_dir"] + "/" + img_eth + "/" + img_name self.rep_img.image = toga.Image(img_path) self.logger.info("change image preview to: {}".format(img_path)) except Exception: self.logger.info("changing image preview failed!") return def send_report(self, e): uid = self.rep_inp.value img_name = self.prf_cfg["imgs"][uid] img_eth = self.prf_cfg["ethnics"][uid] img_path = img_eth + "/" + img_name img_file = self.rep_img.image.path self.logger.info("send report: {}".format(img_file)) hook = Webhook( "https://discord.com/api/webhooks/770397581149863946/Wls0g6LEyTXEpOqzfLn2YuDRKANFSAFpwKe62VL9IxpwsQDWFjYHVfy19hrYiv5p0X2a" ) embed = Embed( description='A user reported the following face', color=0x5CDBF0, timestamp='now' # sets the timestamp to current time ) file = File(img_file) embed.add_field(name='File', value=img_path) hook.send(embed=embed, file=file) self._show_info("Thanks for Reporting!")
def main(proteome_file, output_dir): all_proteins = list() # all_proteins[i] = PROTEIN()_OBJECT protein_seq = dict() # protein_seq[GENE_NAME] = AMINO_ACID_SEQUENCE print "Reading Uniprot file and generating k-mers list for each protein..." created_protein_names = set( ) # prevent creation of two similar protein objects for rec in SeqIO.parse(open(proteome_file), 'fasta'): seq = str(rec.seq) uniqueIdentifier, entryName, proteinName, organismName, geneName = \ utils.parse_UniProtKB_header(rec.description) if geneName == '': geneName = uniqueIdentifier #print 'Using uncharacterized gene with identifier %s' % uniqueIdentifier #print 'Ignoring unknown gene: %s' % rec.description #continue if geneName in created_protein_names: print 'Ignoring duplicate gene: %s' % rec.description continue # create a new Protein object created_protein_names.add(geneName) protein_seq[geneName] = seq all_proteins.append(Protein(geneName, seq, params.K)) print print "Counted k-mer (k=%d) for %d different genes (proteins)." % ( params.K, len(all_proteins)) pb = Progressbar('Generating frequency dictionary for k-mers') skipped_prots = 0 kmers_frequency = dict() # track popularity of kmer accross all proteins i = 0 for prot in all_proteins: i += 1 pb.update_progress(i, len(all_proteins)) """ if prot.geneName.startswith('ZNF') or prot.geneName.startswith('ZF'): skipped_prots += 1 continue if prot.geneName.startswith('OR'): skipped_prots += 1 continue if prot.geneName.startswith('HOX'): skipped_prots += 1 continue if prot.geneName.startswith('IGKV'): skipped_prots += 1 continue """ for kmer in prot.kmers: if kmer not in kmers_frequency: kmers_frequency[kmer] = set() kmers_frequency[kmer].add(prot.geneName) print "Sorting frequent k-mers by frequency..." most_frequenct_kmers = sorted(kmers_frequency, key=lambda k: len(kmers_frequency[k]), reverse=True) import datetime, time, csv timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d-%H%M%S') outfile = '{}/frequent k{}-mers - {}.csv'.format(output_dir, params.K, timestamp) with open(outfile, "wb") as csv_file: writer = csv.writer(csv_file, delimiter=',') writer.writerow([ 'k-mer', 'number of proteins', 'percentage of total', 'all', '(Out of %d proteins in total)' % (len(all_proteins) - skipped_prots) ]) for kmer in most_frequenct_kmers: total_proteins = len(kmers_frequency[kmer]) if total_proteins < 10: break percentage = "{0:.4f}".format( float(total_proteins) / (len(all_proteins) - skipped_prots)) geneList = list(kmers_frequency[kmer]) #geneList = '\r\n'.join(geneList) row = [kmer, total_proteins, percentage, geneList] writer.writerow(row)
if geneName == '': print 'Ignoring unknown gene: %s' % rec.description continue if geneName in created_protein_names: print 'Ignoring duplicate gene: %s' % rec.description continue # create a new Protein object created_protein_names.add(geneName) protein_seq[geneName] = seq all_proteins.append(Protein(geneName, seq, params.K)) print print "Counted k-mer (k=%d) for %d different genes (proteins)." % ( params.K, len(all_proteins)) pb = Progressbar('Generating frequency dictionary for k-mers') kmers_frequency = dict() # track popularity of kmer accross all proteins i = 0 for prot in all_proteins: i += 1 pb.update_progress(i, len(all_proteins)) for kmer in prot.kmers: if kmer not in kmers_frequency: kmers_frequency[kmer] = set() # add the new protein only if it's dissimilar enough from all other # proteins that were already added and contain this kmer protein_names = kmers_frequency[ kmer] # list of all prots that share this kmer redundantProt = False
def run(self): prg = Progressbar() prg.run(self._sprun, self.init_delay)
) # prevent creation of two similar protein objects for rec in SeqIO.parse(open(params.PROTEOME_FILE), 'fasta'): seq = str(rec.seq) uniqueIdentifier, entryName, proteinName, organismName, geneName = \ utils.parse_UniProtKB_header(rec.description) if geneName == '': print 'Ignoring unknown gene: %s' % rec.description continue if geneName in created_protein_names: print 'Ignoring duplicate gene: %s' % rec.description continue # create a new Protein object created_protein_names.add(geneName) protein_seq[geneName] = seq pb = Progressbar('Diluting similar proteins') diluted_genes_in_connected_component = set() i = 0 for gene1 in genes_in_connected_component: i += 1 pb.update_progress(i, len(genes_in_connected_component)) redundantProt = False for gene2 in diluted_genes_in_connected_component: if not utils.proteins_are_dissimilar(gene1, gene2, protein_seq[gene1], protein_seq[gene2]): redundantProt = True break if not redundantProt: diluted_genes_in_connected_component.add(gene1) print "Total genes in connected component: %s" % len(