def __read_config(): base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) settings_file = os.path.join( os.path.join(os.path.realpath(base_dir), "config"), "settings.yml") # 全局配置文件的绝对路径 account_file = os.path.join( os.path.join(os.path.realpath(base_dir), "config"), "account.yml") db_file = os.path.join(os.path.join(os.path.realpath(base_dir), "config"), "db.yml") settings = FileUtil().connect_to(settings_file).parsed_data accounts = FileUtil().connect_to(account_file).parsed_data db = FileUtil().connect_to(db_file).parsed_data ENV = settings["env"] LOG_SWITCH = settings["log_switch"] LOG_LEVEL = settings["log_level"] PRINT_SWITCH = settings["print_switch"] return ENV, LOG_SWITCH, accounts, db, LOG_LEVEL, PRINT_SWITCH
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_GenomeBrowser'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'kb_GenomeBrowser', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.serviceImpl = kb_GenomeBrowser(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] suffix = int(time.time() * 1000) wsName = "test_GenomeBrowser_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': wsName}) # noqa cls.wsName = wsName cls.file_util = FileUtil(wsName, cls.wsURL, cls.callback_url) # Upload genomes base_gbk_file = "data/at_chrom1_section.gbk" gbk_file = os.path.join(cls.scratch, os.path.basename(base_gbk_file)) shutil.copy(base_gbk_file, gbk_file) cls.genome_ref = cls.file_util.load_genbank_file(gbk_file, 'my_test_genome') # get gff file cls.gff_file = cls.file_util.get_gff_file(cls.genome_ref) # get fasta file cls.fasta_file = cls.file_util.get_fasta_file(cls.genome_ref) # Upload reads base_reads_file = "data/extracted_WT_rep1.fastq" reads_file = os.path.join(cls.scratch, os.path.basename(base_reads_file)) shutil.copy(base_reads_file, reads_file) cls.reads_ref = cls.file_util.load_reads_file("illumina", reads_file, None, "my_reads_lib") # Upload alignments base_align_file = "data/at_chr1_wt_rep1_hisat2.bam" cls.bam_file = os.path.join(cls.scratch, os.path.basename(base_align_file)) shutil.copy(base_align_file, cls.bam_file) cls.alignment_ref = cls.file_util.load_bam_file(cls.bam_file, cls.genome_ref, cls.reads_ref, 'my_hisat2_alignment')
def test_get_html(self): """ Tests get_html method """ file_util = FileUtil() expected_html = file_util.get_file_contents("example.html") html_requester = HtmlRequester() actual_html = html_requester.get_html("http://example.org") self.assertEqual(expected_html, actual_html)
def test_get_web_pages(self): """ Tests get_web_pages method """ file_util = FileUtil() expected_web_pages = file_util.get_file_contents("web_pages_test_data.txt") html_parser = HtmlParser() same_hostname_urls = file_util.get_file_contents("same_hostname_urls_test_data.txt") actual_web_pages = html_parser.get_web_pages(same_hostname_urls) self.assertEqual(expected_web_pages, actual_web_pages)
def test_get_links(self): """ Tests get_links method """ file_util = FileUtil() expected_links = file_util.get_file_contents("links_test_data.txt") html_parser = HtmlParser() html_test_data = file_util.get_file_contents("html_test_data.html") actual_links = html_parser.get_links(html_test_data) self.assertEqual(expected_links, actual_links)
def test_get_same_hostname_urls(self): """ Tests get_same_hostname_urls method """ file_util = FileUtil() expected_same_hostname_urls = file_util.get_file_contents("same_hostname_urls_test_data.txt") html_parser = HtmlParser() hostname = "http://www.domain.com/" links = file_util.get_file_contents("links_test_data.txt") actual_same_hostname_urls = html_parser.get_same_hostname_urls(hostname, links) self.assertEqual(expected_same_hostname_urls, actual_same_hostname_urls)
def mock_get_html(self, url): """ Mocks the get_html method of the html_requester class to return the contents of html_test_data.html This mocking allows for inputting test html data without having to host it online. """ if url == "http://www.domain.com": file_util = FileUtil() html_test_data = file_util.get_file_contents("html_test_data.html") return html_test_data else: return ""
def link_features(path): # Load the datasets dataframes util = FileUtil(path) util.walk() for key in util.datums: fname = util.datums[key].split('/')[-1] if ('.features.csv' in fname): infile = util.datums[key] outfile = '/pfs/out/' + fname try: os.symlink(infile, outfile) except: print('Cannot create sim-link', infile, outfile)
def process(repoA, repoB): utilA = FileUtil(repoA) utilB = FileUtil(repoB) utilA.walk() utilB.walk() data = [] for i, keyA in enumerate(utilA.datums.keys()): for j, keyB in enumerate(utilB.datums.keys()): fileA = utilA.datums[keyA] fileB = utilB.datums[keyB] get_data(fileA, data) get_data(fileB, data) blocking_zip(data, 0) if (blocking(data, 0) is True): nameA = 'recordA' + '_' + str(i) nameB = 'recordB' + '_' + str(j) tmpname = tempfile.mkdtemp(prefix='/pfs/out/') print('Files to link: ', fileA, os.path.join(tmpname, nameA)) print('Files to link: ', fileB, os.path.join(tmpname, nameB)) os.symlink(fileA, os.path.join(tmpname, nameA)) os.symlink(fileB, os.path.join(tmpname, nameB))
def load_data(path, dsetname=''): util = FileUtil(path) util.walk() for key in util.datums: if(len(dsetname) > 0): fname = util.datums[key].split('/')[-1] if(fname != dsetname): continue print('Load ', key) data = pd.read_csv(util.datums[key], index_col="record_id") write_meta(list(data.columns.values),key) return data
def test_crawl(self): """ Tests crawl method The get_html method of the html_requester class is mocked to return the contents of html_test_data.html. This mocking allows for inputting test html data without having to host it online. """ file_util = FileUtil() expected_result = file_util.get_file_contents("crawl_test_data.txt") web_crawler = WebCrawler() web_crawler.html_requester.get_html = lambda url: self.mock_get_html( url) actual_result = web_crawler.crawl("http://www.domain.com") self.assertEqual(expected_result, actual_result)
def load_data(path, dsetname=''): util = FileUtil(path) util.walk() for key in util.datums: if (len(dsetname) > 0): fname = util.datums[key].split('/')[-1] if (dsetname in fname): print('Load ', key) #data = pd.read_csv(util.datums[key], # header=None) #write_meta(list(data.columns.values),key) copyfile(util.datums[key], ) else: print(dsetname, key) return data
def get_data(path): util = FileUtil(path) util.walk() data = [] name = '' for key in util.datums: print(key, util.datums[key]) name = name + key with open(util.datums[key], 'r') as datum: reader = csv.reader(datum) for row in reader: data.append(row) if(len(data) > 0): result = merge_data(data) write(name, result) else: print('No datums collected')
def validate_meta(path): util = FileUtil(path) util.walk() meta = [] for key in util.datums: parts = key.split('.') for part in parts: if part != 'meta': continue with open(path + '/' + key, 'r') as f: reader = csv.reader(f) if (len(meta) == 0): meta = next(reader) for row in reader: if meta != row: print('Meta data does not match', meta, row) return [] return meta
def load_processed(path, dsetname, columns, dtypes): util = FileUtil(path) util.walk() for key in util.datums: if(len(dsetname) > 0): fname = util.datums[key].split('/')[-1] if(fname != dsetname): continue print('Load ', key) data = pd.read_csv(util.datums[key], header=None) data = data.drop(data.columns[1],axis=1) data.columns = columns for key in dtypes: data[key] = data[key].astype(dtypes[key]) data = data.set_index('rec_id') return data
def merge(pathA, dsetnameA='', outpath='./'): ''' Following method provides example linking of two datasets ''' # Load the datasets dataframes util = FileUtil(pathA) util.walk() for key in util.datums: if (len(dsetnameA) > 0): fname = util.datums[key].split('/')[-1] if (dsetnameA in fname): print('Copy ', fname) copyfile(util.datums[key], '/pfs/out/output_valid.csv') if ('.features.csv' in fname): infile = util.datums[key] outfile = '/pfs/out/' + fname try: os.symlink(infile, outfile) except: print('Cannot create sim-link', infile, outfile)
def load_original(path, dsetname=''): util = FileUtil(path) util.walk() for key in util.datums: if(len(dsetname) > 0): fname = util.datums[key].split('/')[-1] if(fname != dsetname): continue print('Load ', key) data = pd.read_csv(util.datums[key], index_col="rec_id", sep=",", engine='c', skipinitialspace=True, encoding='utf-8', dtype={ "given_name": str, "street_number": float, "date_of_birth": float, "soc_sec_id": int, "postcode": float }) return data
def plot_simulations(folder_path, n_modes=3, n_simulations=10): for reward_type in reward_types: scores = [] rewards = [] pos_rewards = [] neg_rewards = [] episodes = [] for mode in modes: file_util = FileUtil(folder_path) s_data, r_data, r_pos_data, r_neg_data = file_util.read_files(reward_type, mode) s_sum, episodes, r_sum, r_episodes = summarize_simulations(s_data, r_data, n_simulations=n_simulations) scores = np.append(scores, s_sum) rewards = np.append(rewards, r_sum) if len(r_pos_data) > 0: r_pos_sum, pos_episodes = summarize_reward_simulations(r_pos_data, n_simulations=n_simulations) pos_rewards = np.append(pos_rewards, r_pos_sum) if len(r_neg_data) > 0: r_neg_sum, neg_episodes = summarize_reward_simulations(r_neg_data, n_simulations=n_simulations) neg_rewards = np.append(neg_rewards, r_neg_sum) if n_modes > 0: shape = (n_modes, len(episodes)) scores = scores.reshape(shape) rewards = rewards.reshape(shape) if len(pos_rewards) > 0: shape = (n_modes-1, len(episodes)) pos_rewards = pos_rewards.reshape(shape) if len(neg_rewards) > 0: shape = (n_modes-1, len(episodes)) neg_rewards = neg_rewards.reshape(shape) reward_title, score_title = map_reward_type_to_title(reward_type) # create_title(reward_type, mode) plt.clf() # plt.plot(episodes, scores, label="Both") plt.plot(episodes, scores[0], label="Both") if n_modes >= 2: plt.plot(episodes, scores[1], label="Negative") if n_modes >= 3: plt.plot(episodes, scores[2], label="Positive") plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0), ncol=3, fancybox=True, shadow=False) plt.xlim(xmin=0, xmax=1000) plt.ylim(ymin=0, ymax=100) plt.ylabel('n Hits (max=100)') plt.xlabel('Episodes') plt.title(score_title) plt.show() plt.clf() # plt.plot(episodes, rewards, label="Both") plt.plot(episodes, rewards[0], label="Both") if n_modes >= 2: plt.plot(episodes, rewards[1], label="Negative") if n_modes >= 3: plt.plot(episodes, rewards[2], label="Positive") plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0), ncol=3, fancybox=True, shadow=False) plt.xlim(xmin=0, xmax=1000) plt.ylabel('Cumulative reward') plt.xlabel('Episodes') plt.title(reward_title) plt.show() if len(pos_rewards) > 0: plt.clf() # plt.plot(episodes, rewards, label="Both") if n_modes - 1 >= 1: plt.plot(episodes, pos_rewards[0], label="Both") if n_modes - 1 >= 2: plt.plot(episodes, pos_rewards[1], label="Positive") plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0), ncol=3, fancybox=True, shadow=False) plt.xlim(xmin=0, xmax=1000) plt.ylabel('Cumulative positive reward') plt.xlabel('Episodes') plt.title(reward_title) plt.show() if len(neg_rewards) > 0: plt.clf() # plt.plot(episodes, rewards, label="Both") if n_modes - 1 >= 1: plt.plot(episodes, neg_rewards[0], label="Both") if n_modes - 1 >= 2: plt.plot(episodes, neg_rewards[1].flatten(), label="Negative") plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0), ncol=3, fancybox=True, shadow=False) plt.xlim(xmin=0, xmax=1000) plt.ylabel('Cumulative negative reward') plt.xlabel('Episodes') plt.title(reward_title) plt.show()
from modes_and_types import * from file_util import FileUtil # reward_types = all_rewards reward_types = [RewardType.TRACKING] modes = all_modes n_simulations = 10 frames_per_second = 20000 num_episodes = 1000 episode_length = 100 slow_down_on_last_100 = False for reward_type in reward_types: for mode in modes: file_util = FileUtil('sim_data/rev3/') for i_simulation in range(1, n_simulations + 1): sim_start = time() print(f'Simulation: {i_simulation}') pygame.init() # Define some colors BLACK = (0, 0, 0) WHITE = (255, 255, 255) # Open a new window x = 140 y = 150 paddle_width = 5 paddle_height = 20
from file_util import FileUtil # reward_types = all_rewards reward_types = [RewardType.TRACKING] modeA = Mode.BOTH modeB = Mode.BOTH reward_typeA = RewardType.TRACKING_PROPORTIONAL_UNIDIRECTIONAL reward_typeB = RewardType.TRACKING_PROPORTIONAL_UNIDIRECTIONAL_WEIGHTED n_simulations = 5 frames_per_second = 20000 num_episodes = 1000 episode_length = 100 slow_down_on_last_100 = False file_util = FileUtil('sim_data/test/') for i_simulation in range(1, n_simulations + 1): sim_start = time() print(f'Simulation: {i_simulation}') pygame.init() # Define some colors BLACK = (0, 0, 0) WHITE = (255, 255, 255) # Open a new window x = 140 y = 150 paddle_width = 5 paddle_height = 20
def __init__(self, old_file, new_file, filename=""): if filename: self.fu = FileUtil(filename) self.fu.get_structure() self.old_file = old_file self.new_file = new_file